aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c791
1 files changed, 425 insertions, 366 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index e53eacd75c8..ebc9b8dca88 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,7 +39,6 @@
39#include <linux/sysdev.h> 39#include <linux/sysdev.h>
40#include <asm/cacheflush.h> 40#include <asm/cacheflush.h>
41#include <asm/iommu.h> 41#include <asm/iommu.h>
42#include <asm/e820.h>
43#include "pci.h" 42#include "pci.h"
44 43
45#define ROOT_SIZE VTD_PAGE_SIZE 44#define ROOT_SIZE VTD_PAGE_SIZE
@@ -57,14 +56,32 @@
57#define MAX_AGAW_WIDTH 64 56#define MAX_AGAW_WIDTH 64
58 57
59#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
59#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
60 60
61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
64 64
65#ifndef PHYSICAL_PAGE_MASK 65
66#define PHYSICAL_PAGE_MASK PAGE_MASK 66/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
67#endif 67 are never going to work. */
68static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
69{
70 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
71}
72
73static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
74{
75 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
76}
77static inline unsigned long page_to_dma_pfn(struct page *pg)
78{
79 return mm_to_dma_pfn(page_to_pfn(pg));
80}
81static inline unsigned long virt_to_dma_pfn(void *p)
82{
83 return page_to_dma_pfn(virt_to_page(p));
84}
68 85
69/* global iommu list, set NULL for ignored DMAR units */ 86/* global iommu list, set NULL for ignored DMAR units */
70static struct intel_iommu **g_iommus; 87static struct intel_iommu **g_iommus;
@@ -205,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
205 222
206static inline u64 dma_pte_addr(struct dma_pte *pte) 223static inline u64 dma_pte_addr(struct dma_pte *pte)
207{ 224{
208 return (pte->val & VTD_PAGE_MASK); 225#ifdef CONFIG_64BIT
226 return pte->val & VTD_PAGE_MASK;
227#else
228 /* Must have a full atomic 64-bit read */
229 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
230#endif
209} 231}
210 232
211static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 233static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
212{ 234{
213 pte->val |= (addr & VTD_PAGE_MASK); 235 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
214} 236}
215 237
216static inline bool dma_pte_present(struct dma_pte *pte) 238static inline bool dma_pte_present(struct dma_pte *pte)
@@ -218,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
218 return (pte->val & 3) != 0; 240 return (pte->val & 3) != 0;
219} 241}
220 242
243static inline int first_pte_in_page(struct dma_pte *pte)
244{
245 return !((unsigned long)pte & ~VTD_PAGE_MASK);
246}
247
221/* 248/*
222 * This domain is a statically identity mapping domain. 249 * This domain is a statically identity mapping domain.
223 * 1. This domain creats a static 1:1 mapping to all usable memory. 250 * 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -245,7 +272,6 @@ struct dmar_domain {
245 struct iova_domain iovad; /* iova's that belong to this domain */ 272 struct iova_domain iovad; /* iova's that belong to this domain */
246 273
247 struct dma_pte *pgd; /* virtual address */ 274 struct dma_pte *pgd; /* virtual address */
248 spinlock_t mapping_lock; /* page table lock */
249 int gaw; /* max guest address width */ 275 int gaw; /* max guest address width */
250 276
251 /* adjusted guest address width, 0 is level 2 30-bit */ 277 /* adjusted guest address width, 0 is level 2 30-bit */
@@ -649,80 +675,78 @@ static inline int width_to_agaw(int width)
649 675
650static inline unsigned int level_to_offset_bits(int level) 676static inline unsigned int level_to_offset_bits(int level)
651{ 677{
652 return (12 + (level - 1) * LEVEL_STRIDE); 678 return (level - 1) * LEVEL_STRIDE;
653} 679}
654 680
655static inline int address_level_offset(u64 addr, int level) 681static inline int pfn_level_offset(unsigned long pfn, int level)
656{ 682{
657 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); 683 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
658} 684}
659 685
660static inline u64 level_mask(int level) 686static inline unsigned long level_mask(int level)
661{ 687{
662 return ((u64)-1 << level_to_offset_bits(level)); 688 return -1UL << level_to_offset_bits(level);
663} 689}
664 690
665static inline u64 level_size(int level) 691static inline unsigned long level_size(int level)
666{ 692{
667 return ((u64)1 << level_to_offset_bits(level)); 693 return 1UL << level_to_offset_bits(level);
668} 694}
669 695
670static inline u64 align_to_level(u64 addr, int level) 696static inline unsigned long align_to_level(unsigned long pfn, int level)
671{ 697{
672 return ((addr + level_size(level) - 1) & level_mask(level)); 698 return (pfn + level_size(level) - 1) & level_mask(level);
673} 699}
674 700
675static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) 701static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
702 unsigned long pfn)
676{ 703{
677 int addr_width = agaw_to_width(domain->agaw); 704 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
678 struct dma_pte *parent, *pte = NULL; 705 struct dma_pte *parent, *pte = NULL;
679 int level = agaw_to_level(domain->agaw); 706 int level = agaw_to_level(domain->agaw);
680 int offset; 707 int offset;
681 unsigned long flags;
682 708
683 BUG_ON(!domain->pgd); 709 BUG_ON(!domain->pgd);
684 710 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
685 addr &= (((u64)1) << addr_width) - 1;
686 parent = domain->pgd; 711 parent = domain->pgd;
687 712
688 spin_lock_irqsave(&domain->mapping_lock, flags);
689 while (level > 0) { 713 while (level > 0) {
690 void *tmp_page; 714 void *tmp_page;
691 715
692 offset = address_level_offset(addr, level); 716 offset = pfn_level_offset(pfn, level);
693 pte = &parent[offset]; 717 pte = &parent[offset];
694 if (level == 1) 718 if (level == 1)
695 break; 719 break;
696 720
697 if (!dma_pte_present(pte)) { 721 if (!dma_pte_present(pte)) {
722 uint64_t pteval;
723
698 tmp_page = alloc_pgtable_page(); 724 tmp_page = alloc_pgtable_page();
699 725
700 if (!tmp_page) { 726 if (!tmp_page)
701 spin_unlock_irqrestore(&domain->mapping_lock,
702 flags);
703 return NULL; 727 return NULL;
728
729 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
730 pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
731 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
732 /* Someone else set it while we were thinking; use theirs. */
733 free_pgtable_page(tmp_page);
734 } else {
735 dma_pte_addr(pte);
736 domain_flush_cache(domain, pte, sizeof(*pte));
704 } 737 }
705 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
706 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
707 /*
708 * high level table always sets r/w, last level page
709 * table control read/write
710 */
711 dma_set_pte_readable(pte);
712 dma_set_pte_writable(pte);
713 domain_flush_cache(domain, pte, sizeof(*pte));
714 } 738 }
715 parent = phys_to_virt(dma_pte_addr(pte)); 739 parent = phys_to_virt(dma_pte_addr(pte));
716 level--; 740 level--;
717 } 741 }
718 742
719 spin_unlock_irqrestore(&domain->mapping_lock, flags);
720 return pte; 743 return pte;
721} 744}
722 745
723/* return address's pte at specific level */ 746/* return address's pte at specific level */
724static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, 747static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
725 int level) 748 unsigned long pfn,
749 int level)
726{ 750{
727 struct dma_pte *parent, *pte = NULL; 751 struct dma_pte *parent, *pte = NULL;
728 int total = agaw_to_level(domain->agaw); 752 int total = agaw_to_level(domain->agaw);
@@ -730,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
730 754
731 parent = domain->pgd; 755 parent = domain->pgd;
732 while (level <= total) { 756 while (level <= total) {
733 offset = address_level_offset(addr, total); 757 offset = pfn_level_offset(pfn, total);
734 pte = &parent[offset]; 758 pte = &parent[offset];
735 if (level == total) 759 if (level == total)
736 return pte; 760 return pte;
@@ -743,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
743 return NULL; 767 return NULL;
744} 768}
745 769
746/* clear one page's page table */
747static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
748{
749 struct dma_pte *pte = NULL;
750
751 /* get last level pte */
752 pte = dma_addr_level_pte(domain, addr, 1);
753
754 if (pte) {
755 dma_clear_pte(pte);
756 domain_flush_cache(domain, pte, sizeof(*pte));
757 }
758}
759
760/* clear last level pte, a tlb flush should be followed */ 770/* clear last level pte, a tlb flush should be followed */
761static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) 771static void dma_pte_clear_range(struct dmar_domain *domain,
772 unsigned long start_pfn,
773 unsigned long last_pfn)
762{ 774{
763 int addr_width = agaw_to_width(domain->agaw); 775 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
764 int npages; 776 struct dma_pte *first_pte, *pte;
765 777
766 start &= (((u64)1) << addr_width) - 1; 778 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
767 end &= (((u64)1) << addr_width) - 1; 779 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
768 /* in case it's partial page */
769 start &= PAGE_MASK;
770 end = PAGE_ALIGN(end);
771 npages = (end - start) / VTD_PAGE_SIZE;
772 780
773 /* we don't need lock here, nobody else touches the iova range */ 781 /* we don't need lock here; nobody else touches the iova range */
774 while (npages--) { 782 while (start_pfn <= last_pfn) {
775 dma_pte_clear_one(domain, start); 783 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
776 start += VTD_PAGE_SIZE; 784 if (!pte) {
785 start_pfn = align_to_level(start_pfn + 1, 2);
786 continue;
787 }
788 do {
789 dma_clear_pte(pte);
790 start_pfn++;
791 pte++;
792 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
793
794 domain_flush_cache(domain, first_pte,
795 (void *)pte - (void *)first_pte);
777 } 796 }
778} 797}
779 798
780/* free page table pages. last level pte should already be cleared */ 799/* free page table pages. last level pte should already be cleared */
781static void dma_pte_free_pagetable(struct dmar_domain *domain, 800static void dma_pte_free_pagetable(struct dmar_domain *domain,
782 u64 start, u64 end) 801 unsigned long start_pfn,
802 unsigned long last_pfn)
783{ 803{
784 int addr_width = agaw_to_width(domain->agaw); 804 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
785 struct dma_pte *pte; 805 struct dma_pte *first_pte, *pte;
786 int total = agaw_to_level(domain->agaw); 806 int total = agaw_to_level(domain->agaw);
787 int level; 807 int level;
788 u64 tmp; 808 unsigned long tmp;
789 809
790 start &= (((u64)1) << addr_width) - 1; 810 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
791 end &= (((u64)1) << addr_width) - 1; 811 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
792 812
793 /* we don't need lock here, nobody else touches the iova range */ 813 /* We don't need lock here; nobody else touches the iova range */
794 level = 2; 814 level = 2;
795 while (level <= total) { 815 while (level <= total) {
796 tmp = align_to_level(start, level); 816 tmp = align_to_level(start_pfn, level);
797 if (tmp >= end || (tmp + level_size(level) > end)) 817
818 /* If we can't even clear one PTE at this level, we're done */
819 if (tmp + level_size(level) - 1 > last_pfn)
798 return; 820 return;
799 821
800 while (tmp < end) { 822 while (tmp + level_size(level) - 1 <= last_pfn) {
801 pte = dma_addr_level_pte(domain, tmp, level); 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
802 if (pte) { 824 if (!pte) {
803 free_pgtable_page( 825 tmp = align_to_level(tmp + 1, level + 1);
804 phys_to_virt(dma_pte_addr(pte))); 826 continue;
805 dma_clear_pte(pte);
806 domain_flush_cache(domain, pte, sizeof(*pte));
807 } 827 }
808 tmp += level_size(level); 828 do {
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
833 pte++;
834 tmp += level_size(level);
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
809 } 841 }
810 level++; 842 level++;
811 } 843 }
812 /* free pgd */ 844 /* free pgd */
813 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
814 free_pgtable_page(domain->pgd); 846 free_pgtable_page(domain->pgd);
815 domain->pgd = NULL; 847 domain->pgd = NULL;
816 } 848 }
@@ -1036,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1036} 1068}
1037 1069
1038static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1039 u64 addr, unsigned int pages) 1071 unsigned long pfn, unsigned int pages)
1040{ 1072{
1041 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1042 1075
1043 BUG_ON(addr & (~VTD_PAGE_MASK));
1044 BUG_ON(pages == 0); 1076 BUG_ON(pages == 0);
1045 1077
1046 /* 1078 /*
@@ -1055,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1055 else 1087 else
1056 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1057 DMA_TLB_PSI_FLUSH); 1089 DMA_TLB_PSI_FLUSH);
1058 if (did) 1090
1091 /*
1092 * In caching mode, domain ID 0 is reserved for non-present to present
1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
1059 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1060} 1097}
1061 1098
@@ -1280,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
1280 struct pci_dev *pdev = NULL; 1317 struct pci_dev *pdev = NULL;
1281 struct iova *iova; 1318 struct iova *iova;
1282 int i; 1319 int i;
1283 u64 addr, size;
1284 1320
1285 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1321 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1286 1322
@@ -1303,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
1303 r = &pdev->resource[i]; 1339 r = &pdev->resource[i];
1304 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1305 continue; 1341 continue;
1306 addr = r->start; 1342 iova = reserve_iova(&reserved_iova_list,
1307 addr &= PHYSICAL_PAGE_MASK; 1343 IOVA_PFN(r->start),
1308 size = r->end - addr; 1344 IOVA_PFN(r->end));
1309 size = PAGE_ALIGN(size);
1310 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1311 IOVA_PFN(size + addr) - 1);
1312 if (!iova) 1345 if (!iova)
1313 printk(KERN_ERR "Reserve iova failed\n"); 1346 printk(KERN_ERR "Reserve iova failed\n");
1314 } 1347 }
@@ -1342,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1342 unsigned long sagaw; 1375 unsigned long sagaw;
1343 1376
1344 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1345 spin_lock_init(&domain->mapping_lock);
1346 spin_lock_init(&domain->iommu_lock); 1378 spin_lock_init(&domain->iommu_lock);
1347 1379
1348 domain_reserve_special_ranges(domain); 1380 domain_reserve_special_ranges(domain);
@@ -1389,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
1389{ 1421{
1390 struct dmar_drhd_unit *drhd; 1422 struct dmar_drhd_unit *drhd;
1391 struct intel_iommu *iommu; 1423 struct intel_iommu *iommu;
1392 u64 end;
1393 1424
1394 /* Domain 0 is reserved, so dont process it */ 1425 /* Domain 0 is reserved, so dont process it */
1395 if (!domain) 1426 if (!domain)
@@ -1398,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
1398 domain_remove_dev_info(domain); 1429 domain_remove_dev_info(domain);
1399 /* destroy iovas */ 1430 /* destroy iovas */
1400 put_iova_domain(&domain->iovad); 1431 put_iova_domain(&domain->iovad);
1401 end = DOMAIN_MAX_ADDR(domain->gaw);
1402 end = end & (~PAGE_MASK);
1403 1432
1404 /* clear ptes */ 1433 /* clear ptes */
1405 dma_pte_clear_range(domain, 0, end); 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1406 1435
1407 /* free page tables */ 1436 /* free page tables */
1408 dma_pte_free_pagetable(domain, 0, end); 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1409 1438
1410 for_each_active_iommu(iommu, drhd) 1439 for_each_active_iommu(iommu, drhd)
1411 if (test_bit(iommu->seq_id, &domain->iommu_bmp)) 1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
@@ -1619,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
1619 tmp->devfn); 1648 tmp->devfn);
1620} 1649}
1621 1650
1622static int 1651static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1623domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 1652 struct scatterlist *sg, unsigned long phys_pfn,
1624 u64 hpa, size_t size, int prot) 1653 unsigned long nr_pages, int prot)
1625{ 1654{
1626 u64 start_pfn, end_pfn; 1655 struct dma_pte *first_pte = NULL, *pte = NULL;
1627 struct dma_pte *pte; 1656 phys_addr_t uninitialized_var(pteval);
1628 int index; 1657 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1629 int addr_width = agaw_to_width(domain->agaw); 1658 unsigned long sg_res;
1630 1659
1631 hpa &= (((u64)1) << addr_width) - 1; 1660 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1632 1661
1633 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1662 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1634 return -EINVAL; 1663 return -EINVAL;
1635 iova &= PAGE_MASK; 1664
1636 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; 1665 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1637 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; 1666
1638 index = 0; 1667 if (sg)
1639 while (start_pfn < end_pfn) { 1668 sg_res = 0;
1640 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); 1669 else {
1641 if (!pte) 1670 sg_res = nr_pages + 1;
1642 return -ENOMEM; 1671 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1672 }
1673
1674 while (nr_pages--) {
1675 uint64_t tmp;
1676
1677 if (!sg_res) {
1678 sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
1679 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1680 sg->dma_length = sg->length;
1681 pteval = page_to_phys(sg_page(sg)) | prot;
1682 }
1683 if (!pte) {
1684 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1685 if (!pte)
1686 return -ENOMEM;
1687 }
1643 /* We don't need lock here, nobody else 1688 /* We don't need lock here, nobody else
1644 * touches the iova range 1689 * touches the iova range
1645 */ 1690 */
1646 BUG_ON(dma_pte_addr(pte)); 1691 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1647 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1692 if (tmp) {
1648 dma_set_pte_prot(pte, prot); 1693 static int dumps = 5;
1649 if (prot & DMA_PTE_SNP) 1694 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1650 dma_set_pte_snp(pte); 1695 iov_pfn, tmp, (unsigned long long)pteval);
1651 domain_flush_cache(domain, pte, sizeof(*pte)); 1696 if (dumps) {
1652 start_pfn++; 1697 dumps--;
1653 index++; 1698 debug_dma_dump_mappings(NULL);
1699 }
1700 WARN_ON(1);
1701 }
1702 pte++;
1703 if (!nr_pages || first_pte_in_page(pte)) {
1704 domain_flush_cache(domain, first_pte,
1705 (void *)pte - (void *)first_pte);
1706 pte = NULL;
1707 }
1708 iov_pfn++;
1709 pteval += VTD_PAGE_SIZE;
1710 sg_res--;
1711 if (!sg_res)
1712 sg = sg_next(sg);
1654 } 1713 }
1655 return 0; 1714 return 0;
1656} 1715}
1657 1716
1717static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1718 struct scatterlist *sg, unsigned long nr_pages,
1719 int prot)
1720{
1721 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1722}
1723
1724static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1725 unsigned long phys_pfn, unsigned long nr_pages,
1726 int prot)
1727{
1728 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1729}
1730
1658static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1731static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1659{ 1732{
1660 if (!iommu) 1733 if (!iommu)
@@ -1845,58 +1918,61 @@ error:
1845 1918
1846static int iommu_identity_mapping; 1919static int iommu_identity_mapping;
1847 1920
1921static int iommu_domain_identity_map(struct dmar_domain *domain,
1922 unsigned long long start,
1923 unsigned long long end)
1924{
1925 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1926 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1927
1928 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1929 dma_to_mm_pfn(last_vpfn))) {
1930 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1931 return -ENOMEM;
1932 }
1933
1934 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1935 start, end, domain->id);
1936 /*
1937 * RMRR range might have overlap with physical memory range,
1938 * clear it first
1939 */
1940 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1941
1942 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1943 last_vpfn - first_vpfn + 1,
1944 DMA_PTE_READ|DMA_PTE_WRITE);
1945}
1946
1848static int iommu_prepare_identity_map(struct pci_dev *pdev, 1947static int iommu_prepare_identity_map(struct pci_dev *pdev,
1849 unsigned long long start, 1948 unsigned long long start,
1850 unsigned long long end) 1949 unsigned long long end)
1851{ 1950{
1852 struct dmar_domain *domain; 1951 struct dmar_domain *domain;
1853 unsigned long size;
1854 unsigned long long base;
1855 int ret; 1952 int ret;
1856 1953
1857 printk(KERN_INFO 1954 printk(KERN_INFO
1858 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1955 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1859 pci_name(pdev), start, end); 1956 pci_name(pdev), start, end);
1860 if (iommu_identity_mapping) 1957
1861 domain = si_domain; 1958 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1862 else
1863 /* page table init */
1864 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1865 if (!domain) 1959 if (!domain)
1866 return -ENOMEM; 1960 return -ENOMEM;
1867 1961
1868 /* The address might not be aligned */ 1962 ret = iommu_domain_identity_map(domain, start, end);
1869 base = start & PAGE_MASK;
1870 size = end - base;
1871 size = PAGE_ALIGN(size);
1872 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1873 IOVA_PFN(base + size) - 1)) {
1874 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1875 ret = -ENOMEM;
1876 goto error;
1877 }
1878
1879 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1880 size, base, pci_name(pdev));
1881 /*
1882 * RMRR range might have overlap with physical memory range,
1883 * clear it first
1884 */
1885 dma_pte_clear_range(domain, base, base + size);
1886
1887 ret = domain_page_mapping(domain, base, base, size,
1888 DMA_PTE_READ|DMA_PTE_WRITE);
1889 if (ret) 1963 if (ret)
1890 goto error; 1964 goto error;
1891 1965
1892 /* context entry init */ 1966 /* context entry init */
1893 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 1967 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1894 if (!ret) 1968 if (ret)
1895 return 0; 1969 goto error;
1896error: 1970
1971 return 0;
1972
1973 error:
1897 domain_exit(domain); 1974 domain_exit(domain);
1898 return ret; 1975 return ret;
1899
1900} 1976}
1901 1977
1902static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 1978static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1908,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1908 rmrr->end_address + 1); 1984 rmrr->end_address + 1);
1909} 1985}
1910 1986
1911#ifdef CONFIG_DMAR_GFX_WA
1912struct iommu_prepare_data {
1913 struct pci_dev *pdev;
1914 int ret;
1915};
1916
1917static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1918 unsigned long end_pfn, void *datax)
1919{
1920 struct iommu_prepare_data *data;
1921
1922 data = (struct iommu_prepare_data *)datax;
1923
1924 data->ret = iommu_prepare_identity_map(data->pdev,
1925 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1926 return data->ret;
1927
1928}
1929
1930static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1931{
1932 int nid;
1933 struct iommu_prepare_data data;
1934
1935 data.pdev = pdev;
1936 data.ret = 0;
1937
1938 for_each_online_node(nid) {
1939 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1940 if (data.ret)
1941 return data.ret;
1942 }
1943 return data.ret;
1944}
1945
1946static void __init iommu_prepare_gfx_mapping(void)
1947{
1948 struct pci_dev *pdev = NULL;
1949 int ret;
1950
1951 for_each_pci_dev(pdev) {
1952 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1953 !IS_GFX_DEVICE(pdev))
1954 continue;
1955 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1956 pci_name(pdev));
1957 ret = iommu_prepare_with_active_regions(pdev);
1958 if (ret)
1959 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1960 }
1961}
1962#else /* !CONFIG_DMAR_GFX_WA */
1963static inline void iommu_prepare_gfx_mapping(void)
1964{
1965 return;
1966}
1967#endif
1968
1969#ifdef CONFIG_DMAR_FLOPPY_WA 1987#ifdef CONFIG_DMAR_FLOPPY_WA
1970static inline void iommu_prepare_isa(void) 1988static inline void iommu_prepare_isa(void)
1971{ 1989{
@@ -1976,12 +1994,12 @@ static inline void iommu_prepare_isa(void)
1976 if (!pdev) 1994 if (!pdev)
1977 return; 1995 return;
1978 1996
1979 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); 1997 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
1980 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 1998 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1981 1999
1982 if (ret) 2000 if (ret)
1983 printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " 2001 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
1984 "floppy might not work\n"); 2002 "floppy might not work\n");
1985 2003
1986} 2004}
1987#else 2005#else
@@ -2009,16 +2027,30 @@ static int __init init_context_pass_through(void)
2009} 2027}
2010 2028
2011static int md_domain_init(struct dmar_domain *domain, int guest_width); 2029static int md_domain_init(struct dmar_domain *domain, int guest_width);
2030
2031static int __init si_domain_work_fn(unsigned long start_pfn,
2032 unsigned long end_pfn, void *datax)
2033{
2034 int *ret = datax;
2035
2036 *ret = iommu_domain_identity_map(si_domain,
2037 (uint64_t)start_pfn << PAGE_SHIFT,
2038 (uint64_t)end_pfn << PAGE_SHIFT);
2039 return *ret;
2040
2041}
2042
2012static int si_domain_init(void) 2043static int si_domain_init(void)
2013{ 2044{
2014 struct dmar_drhd_unit *drhd; 2045 struct dmar_drhd_unit *drhd;
2015 struct intel_iommu *iommu; 2046 struct intel_iommu *iommu;
2016 int ret = 0; 2047 int nid, ret = 0;
2017 2048
2018 si_domain = alloc_domain(); 2049 si_domain = alloc_domain();
2019 if (!si_domain) 2050 if (!si_domain)
2020 return -EFAULT; 2051 return -EFAULT;
2021 2052
2053 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2022 2054
2023 for_each_active_iommu(iommu, drhd) { 2055 for_each_active_iommu(iommu, drhd) {
2024 ret = iommu_attach_domain(si_domain, iommu); 2056 ret = iommu_attach_domain(si_domain, iommu);
@@ -2035,6 +2067,12 @@ static int si_domain_init(void)
2035 2067
2036 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2068 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2037 2069
2070 for_each_online_node(nid) {
2071 work_with_active_regions(nid, si_domain_work_fn, &ret);
2072 if (ret)
2073 return ret;
2074 }
2075
2038 return 0; 2076 return 0;
2039} 2077}
2040 2078
@@ -2079,9 +2117,49 @@ static int domain_add_dev_info(struct dmar_domain *domain,
2079 return 0; 2117 return 0;
2080} 2118}
2081 2119
2120static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2121{
2122 if (iommu_identity_mapping == 2)
2123 return IS_GFX_DEVICE(pdev);
2124
2125 /*
2126 * We want to start off with all devices in the 1:1 domain, and
2127 * take them out later if we find they can't access all of memory.
2128 *
2129 * However, we can't do this for PCI devices behind bridges,
2130 * because all PCI devices behind the same bridge will end up
2131 * with the same source-id on their transactions.
2132 *
2133 * Practically speaking, we can't change things around for these
2134 * devices at run-time, because we can't be sure there'll be no
2135 * DMA transactions in flight for any of their siblings.
2136 *
2137 * So PCI devices (unless they're on the root bus) as well as
2138 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2139 * the 1:1 domain, just in _case_ one of their siblings turns out
2140 * not to be able to map all of memory.
2141 */
2142 if (!pdev->is_pcie) {
2143 if (!pci_is_root_bus(pdev->bus))
2144 return 0;
2145 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2146 return 0;
2147 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2148 return 0;
2149
2150 /*
2151 * At boot time, we don't yet know if devices will be 64-bit capable.
2152 * Assume that they will -- if they turn out not to be, then we can
2153 * take them out of the 1:1 domain later.
2154 */
2155 if (!startup)
2156 return pdev->dma_mask > DMA_BIT_MASK(32);
2157
2158 return 1;
2159}
2160
2082static int iommu_prepare_static_identity_mapping(void) 2161static int iommu_prepare_static_identity_mapping(void)
2083{ 2162{
2084 int i;
2085 struct pci_dev *pdev = NULL; 2163 struct pci_dev *pdev = NULL;
2086 int ret; 2164 int ret;
2087 2165
@@ -2089,23 +2167,19 @@ static int iommu_prepare_static_identity_mapping(void)
2089 if (ret) 2167 if (ret)
2090 return -EFAULT; 2168 return -EFAULT;
2091 2169
2092 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2093 for_each_pci_dev(pdev) { 2170 for_each_pci_dev(pdev) {
2094 for (i = 0; i < e820.nr_map; i++) { 2171 if (iommu_should_identity_map(pdev, 1)) {
2095 struct e820entry *ei = &e820.map[i]; 2172 printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
2096 2173 pci_name(pdev));
2097 if (ei->type == E820_RAM) { 2174
2098 ret = iommu_prepare_identity_map(pdev, 2175 ret = domain_context_mapping(si_domain, pdev,
2099 ei->addr, ei->addr + ei->size); 2176 CONTEXT_TT_MULTI_LEVEL);
2100 if (ret) { 2177 if (ret)
2101 printk(KERN_INFO "1:1 mapping to one domain failed.\n"); 2178 return ret;
2102 return -EFAULT; 2179 ret = domain_add_dev_info(si_domain, pdev);
2103 } 2180 if (ret)
2104 } 2181 return ret;
2105 } 2182 }
2106 ret = domain_add_dev_info(si_domain, pdev);
2107 if (ret)
2108 return ret;
2109 } 2183 }
2110 2184
2111 return 0; 2185 return 0;
@@ -2260,6 +2334,10 @@ int __init init_dmars(void)
2260 * identity mapping if iommu_identity_mapping is set. 2334 * identity mapping if iommu_identity_mapping is set.
2261 */ 2335 */
2262 if (!iommu_pass_through) { 2336 if (!iommu_pass_through) {
2337#ifdef CONFIG_DMAR_BROKEN_GFX_WA
2338 if (!iommu_identity_mapping)
2339 iommu_identity_mapping = 2;
2340#endif
2263 if (iommu_identity_mapping) 2341 if (iommu_identity_mapping)
2264 iommu_prepare_static_identity_mapping(); 2342 iommu_prepare_static_identity_mapping();
2265 /* 2343 /*
@@ -2293,8 +2371,6 @@ int __init init_dmars(void)
2293 } 2371 }
2294 } 2372 }
2295 2373
2296 iommu_prepare_gfx_mapping();
2297
2298 iommu_prepare_isa(); 2374 iommu_prepare_isa();
2299 } 2375 }
2300 2376
@@ -2339,50 +2415,40 @@ error:
2339 return ret; 2415 return ret;
2340} 2416}
2341 2417
2342static inline u64 aligned_size(u64 host_addr, size_t size) 2418/* Returns a number of VTD pages, but aligned to MM page size */
2419static inline unsigned long aligned_nrpages(unsigned long host_addr,
2420 size_t size)
2343{ 2421{
2344 u64 addr; 2422 host_addr &= ~PAGE_MASK;
2345 addr = (host_addr & (~PAGE_MASK)) + size; 2423 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2346 return PAGE_ALIGN(addr);
2347}
2348
2349struct iova *
2350iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2351{
2352 struct iova *piova;
2353
2354 /* Make sure it's in range */
2355 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2356 if (!size || (IOVA_START_ADDR + size > end))
2357 return NULL;
2358
2359 piova = alloc_iova(&domain->iovad,
2360 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2361 return piova;
2362} 2424}
2363 2425
2364static struct iova * 2426/* This takes a number of _MM_ pages, not VTD pages */
2365__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, 2427static struct iova *intel_alloc_iova(struct device *dev,
2366 size_t size, u64 dma_mask) 2428 struct dmar_domain *domain,
2429 unsigned long nrpages, uint64_t dma_mask)
2367{ 2430{
2368 struct pci_dev *pdev = to_pci_dev(dev); 2431 struct pci_dev *pdev = to_pci_dev(dev);
2369 struct iova *iova = NULL; 2432 struct iova *iova = NULL;
2370 2433
2371 if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) 2434 /* Restrict dma_mask to the width that the iommu can handle */
2372 iova = iommu_alloc_iova(domain, size, dma_mask); 2435 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2373 else { 2436
2437 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2374 /* 2438 /*
2375 * First try to allocate an io virtual address in 2439 * First try to allocate an io virtual address in
2376 * DMA_BIT_MASK(32) and if that fails then try allocating 2440 * DMA_BIT_MASK(32) and if that fails then try allocating
2377 * from higher range 2441 * from higher range
2378 */ 2442 */
2379 iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); 2443 iova = alloc_iova(&domain->iovad, nrpages,
2380 if (!iova) 2444 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2381 iova = iommu_alloc_iova(domain, size, dma_mask); 2445 if (iova)
2382 } 2446 return iova;
2383 2447 }
2384 if (!iova) { 2448 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2385 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 2449 if (unlikely(!iova)) {
2450 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2451 nrpages, pci_name(pdev));
2386 return NULL; 2452 return NULL;
2387 } 2453 }
2388 2454
@@ -2424,16 +2490,24 @@ static int iommu_dummy(struct pci_dev *pdev)
2424} 2490}
2425 2491
2426/* Check if the pdev needs to go through non-identity map and unmap process.*/ 2492/* Check if the pdev needs to go through non-identity map and unmap process.*/
2427static int iommu_no_mapping(struct pci_dev *pdev) 2493static int iommu_no_mapping(struct device *dev)
2428{ 2494{
2495 struct pci_dev *pdev;
2429 int found; 2496 int found;
2430 2497
2498 if (unlikely(dev->bus != &pci_bus_type))
2499 return 1;
2500
2501 pdev = to_pci_dev(dev);
2502 if (iommu_dummy(pdev))
2503 return 1;
2504
2431 if (!iommu_identity_mapping) 2505 if (!iommu_identity_mapping)
2432 return iommu_dummy(pdev); 2506 return 0;
2433 2507
2434 found = identity_mapping(pdev); 2508 found = identity_mapping(pdev);
2435 if (found) { 2509 if (found) {
2436 if (pdev->dma_mask > DMA_BIT_MASK(32)) 2510 if (iommu_should_identity_map(pdev, 0))
2437 return 1; 2511 return 1;
2438 else { 2512 else {
2439 /* 2513 /*
@@ -2450,9 +2524,12 @@ static int iommu_no_mapping(struct pci_dev *pdev)
2450 * In case of a detached 64 bit DMA device from vm, the device 2524 * In case of a detached 64 bit DMA device from vm, the device
2451 * is put into si_domain for identity mapping. 2525 * is put into si_domain for identity mapping.
2452 */ 2526 */
2453 if (pdev->dma_mask > DMA_BIT_MASK(32)) { 2527 if (iommu_should_identity_map(pdev, 0)) {
2454 int ret; 2528 int ret;
2455 ret = domain_add_dev_info(si_domain, pdev); 2529 ret = domain_add_dev_info(si_domain, pdev);
2530 if (ret)
2531 return 0;
2532 ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2456 if (!ret) { 2533 if (!ret) {
2457 printk(KERN_INFO "64bit %s uses identity mapping\n", 2534 printk(KERN_INFO "64bit %s uses identity mapping\n",
2458 pci_name(pdev)); 2535 pci_name(pdev));
@@ -2461,7 +2538,7 @@ static int iommu_no_mapping(struct pci_dev *pdev)
2461 } 2538 }
2462 } 2539 }
2463 2540
2464 return iommu_dummy(pdev); 2541 return 0;
2465} 2542}
2466 2543
2467static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, 2544static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
@@ -2477,7 +2554,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2477 2554
2478 BUG_ON(dir == DMA_NONE); 2555 BUG_ON(dir == DMA_NONE);
2479 2556
2480 if (iommu_no_mapping(pdev)) 2557 if (iommu_no_mapping(hwdev))
2481 return paddr; 2558 return paddr;
2482 2559
2483 domain = get_valid_domain_for_dev(pdev); 2560 domain = get_valid_domain_for_dev(pdev);
@@ -2485,14 +2562,13 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2485 return 0; 2562 return 0;
2486 2563
2487 iommu = domain_get_iommu(domain); 2564 iommu = domain_get_iommu(domain);
2488 size = aligned_size((u64)paddr, size); 2565 size = aligned_nrpages(paddr, size);
2489 2566
2490 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2567 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2568 pdev->dma_mask);
2491 if (!iova) 2569 if (!iova)
2492 goto error; 2570 goto error;
2493 2571
2494 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2495
2496 /* 2572 /*
2497 * Check if DMAR supports zero-length reads on write only 2573 * Check if DMAR supports zero-length reads on write only
2498 * mappings.. 2574 * mappings..
@@ -2508,20 +2584,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2508 * might have two guest_addr mapping to the same host paddr, but this 2584 * might have two guest_addr mapping to the same host paddr, but this
2509 * is not a big problem 2585 * is not a big problem
2510 */ 2586 */
2511 ret = domain_page_mapping(domain, start_paddr, 2587 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2512 ((u64)paddr) & PHYSICAL_PAGE_MASK, 2588 paddr >> VTD_PAGE_SHIFT, size, prot);
2513 size, prot);
2514 if (ret) 2589 if (ret)
2515 goto error; 2590 goto error;
2516 2591
2517 /* it's a non-present to present mapping. Only flush if caching mode */ 2592 /* it's a non-present to present mapping. Only flush if caching mode */
2518 if (cap_caching_mode(iommu->cap)) 2593 if (cap_caching_mode(iommu->cap))
2519 iommu_flush_iotlb_psi(iommu, 0, start_paddr, 2594 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2520 size >> VTD_PAGE_SHIFT);
2521 else 2595 else
2522 iommu_flush_write_buffer(iommu); 2596 iommu_flush_write_buffer(iommu);
2523 2597
2524 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2598 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2599 start_paddr += paddr & ~PAGE_MASK;
2600 return start_paddr;
2525 2601
2526error: 2602error:
2527 if (iova) 2603 if (iova)
@@ -2614,11 +2690,11 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2614{ 2690{
2615 struct pci_dev *pdev = to_pci_dev(dev); 2691 struct pci_dev *pdev = to_pci_dev(dev);
2616 struct dmar_domain *domain; 2692 struct dmar_domain *domain;
2617 unsigned long start_addr; 2693 unsigned long start_pfn, last_pfn;
2618 struct iova *iova; 2694 struct iova *iova;
2619 struct intel_iommu *iommu; 2695 struct intel_iommu *iommu;
2620 2696
2621 if (iommu_no_mapping(pdev)) 2697 if (iommu_no_mapping(dev))
2622 return; 2698 return;
2623 2699
2624 domain = find_domain(pdev); 2700 domain = find_domain(pdev);
@@ -2627,22 +2703,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2627 iommu = domain_get_iommu(domain); 2703 iommu = domain_get_iommu(domain);
2628 2704
2629 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2705 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2630 if (!iova) 2706 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2707 (unsigned long long)dev_addr))
2631 return; 2708 return;
2632 2709
2633 start_addr = iova->pfn_lo << PAGE_SHIFT; 2710 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2634 size = aligned_size((u64)dev_addr, size); 2711 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2635 2712
2636 pr_debug("Device %s unmapping: %zx@%llx\n", 2713 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2637 pci_name(pdev), size, (unsigned long long)start_addr); 2714 pci_name(pdev), start_pfn, last_pfn);
2638 2715
2639 /* clear the whole page */ 2716 /* clear the whole page */
2640 dma_pte_clear_range(domain, start_addr, start_addr + size); 2717 dma_pte_clear_range(domain, start_pfn, last_pfn);
2718
2641 /* free page tables */ 2719 /* free page tables */
2642 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2720 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2721
2643 if (intel_iommu_strict) { 2722 if (intel_iommu_strict) {
2644 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2723 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2645 size >> VTD_PAGE_SHIFT); 2724 last_pfn - start_pfn + 1);
2646 /* free iova */ 2725 /* free iova */
2647 __free_iova(&domain->iovad, iova); 2726 __free_iova(&domain->iovad, iova);
2648 } else { 2727 } else {
@@ -2700,17 +2779,13 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2700 int nelems, enum dma_data_direction dir, 2779 int nelems, enum dma_data_direction dir,
2701 struct dma_attrs *attrs) 2780 struct dma_attrs *attrs)
2702{ 2781{
2703 int i;
2704 struct pci_dev *pdev = to_pci_dev(hwdev); 2782 struct pci_dev *pdev = to_pci_dev(hwdev);
2705 struct dmar_domain *domain; 2783 struct dmar_domain *domain;
2706 unsigned long start_addr; 2784 unsigned long start_pfn, last_pfn;
2707 struct iova *iova; 2785 struct iova *iova;
2708 size_t size = 0;
2709 phys_addr_t addr;
2710 struct scatterlist *sg;
2711 struct intel_iommu *iommu; 2786 struct intel_iommu *iommu;
2712 2787
2713 if (iommu_no_mapping(pdev)) 2788 if (iommu_no_mapping(hwdev))
2714 return; 2789 return;
2715 2790
2716 domain = find_domain(pdev); 2791 domain = find_domain(pdev);
@@ -2719,22 +2794,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2719 iommu = domain_get_iommu(domain); 2794 iommu = domain_get_iommu(domain);
2720 2795
2721 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2796 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2722 if (!iova) 2797 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2798 (unsigned long long)sglist[0].dma_address))
2723 return; 2799 return;
2724 for_each_sg(sglist, sg, nelems, i) {
2725 addr = page_to_phys(sg_page(sg)) + sg->offset;
2726 size += aligned_size((u64)addr, sg->length);
2727 }
2728 2800
2729 start_addr = iova->pfn_lo << PAGE_SHIFT; 2801 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2802 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2730 2803
2731 /* clear the whole page */ 2804 /* clear the whole page */
2732 dma_pte_clear_range(domain, start_addr, start_addr + size); 2805 dma_pte_clear_range(domain, start_pfn, last_pfn);
2806
2733 /* free page tables */ 2807 /* free page tables */
2734 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2808 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2735 2809
2736 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2810 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2737 size >> VTD_PAGE_SHIFT); 2811 (last_pfn - start_pfn + 1));
2738 2812
2739 /* free iova */ 2813 /* free iova */
2740 __free_iova(&domain->iovad, iova); 2814 __free_iova(&domain->iovad, iova);
@@ -2757,21 +2831,20 @@ static int intel_nontranslate_map_sg(struct device *hddev,
2757static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2831static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2758 enum dma_data_direction dir, struct dma_attrs *attrs) 2832 enum dma_data_direction dir, struct dma_attrs *attrs)
2759{ 2833{
2760 phys_addr_t addr;
2761 int i; 2834 int i;
2762 struct pci_dev *pdev = to_pci_dev(hwdev); 2835 struct pci_dev *pdev = to_pci_dev(hwdev);
2763 struct dmar_domain *domain; 2836 struct dmar_domain *domain;
2764 size_t size = 0; 2837 size_t size = 0;
2765 int prot = 0; 2838 int prot = 0;
2766 size_t offset = 0; 2839 size_t offset_pfn = 0;
2767 struct iova *iova = NULL; 2840 struct iova *iova = NULL;
2768 int ret; 2841 int ret;
2769 struct scatterlist *sg; 2842 struct scatterlist *sg;
2770 unsigned long start_addr; 2843 unsigned long start_vpfn;
2771 struct intel_iommu *iommu; 2844 struct intel_iommu *iommu;
2772 2845
2773 BUG_ON(dir == DMA_NONE); 2846 BUG_ON(dir == DMA_NONE);
2774 if (iommu_no_mapping(pdev)) 2847 if (iommu_no_mapping(hwdev))
2775 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); 2848 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2776 2849
2777 domain = get_valid_domain_for_dev(pdev); 2850 domain = get_valid_domain_for_dev(pdev);
@@ -2780,12 +2853,11 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2780 2853
2781 iommu = domain_get_iommu(domain); 2854 iommu = domain_get_iommu(domain);
2782 2855
2783 for_each_sg(sglist, sg, nelems, i) { 2856 for_each_sg(sglist, sg, nelems, i)
2784 addr = page_to_phys(sg_page(sg)) + sg->offset; 2857 size += aligned_nrpages(sg->offset, sg->length);
2785 size += aligned_size((u64)addr, sg->length);
2786 }
2787 2858
2788 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2859 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2860 pdev->dma_mask);
2789 if (!iova) { 2861 if (!iova) {
2790 sglist->dma_length = 0; 2862 sglist->dma_length = 0;
2791 return 0; 2863 return 0;
@@ -2801,35 +2873,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2801 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2873 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2802 prot |= DMA_PTE_WRITE; 2874 prot |= DMA_PTE_WRITE;
2803 2875
2804 start_addr = iova->pfn_lo << PAGE_SHIFT; 2876 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2805 offset = 0; 2877
2806 for_each_sg(sglist, sg, nelems, i) { 2878 ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
2807 addr = page_to_phys(sg_page(sg)) + sg->offset; 2879 if (unlikely(ret)) {
2808 size = aligned_size((u64)addr, sg->length); 2880 /* clear the page */
2809 ret = domain_page_mapping(domain, start_addr + offset, 2881 dma_pte_clear_range(domain, start_vpfn,
2810 ((u64)addr) & PHYSICAL_PAGE_MASK, 2882 start_vpfn + size - 1);
2811 size, prot); 2883 /* free page tables */
2812 if (ret) { 2884 dma_pte_free_pagetable(domain, start_vpfn,
2813 /* clear the page */ 2885 start_vpfn + size - 1);
2814 dma_pte_clear_range(domain, start_addr, 2886 /* free iova */
2815 start_addr + offset); 2887 __free_iova(&domain->iovad, iova);
2816 /* free page tables */ 2888 return 0;
2817 dma_pte_free_pagetable(domain, start_addr,
2818 start_addr + offset);
2819 /* free iova */
2820 __free_iova(&domain->iovad, iova);
2821 return 0;
2822 }
2823 sg->dma_address = start_addr + offset +
2824 ((u64)addr & (~PAGE_MASK));
2825 sg->dma_length = sg->length;
2826 offset += size;
2827 } 2889 }
2828 2890
2829 /* it's a non-present to present mapping. Only flush if caching mode */ 2891 /* it's a non-present to present mapping. Only flush if caching mode */
2830 if (cap_caching_mode(iommu->cap)) 2892 if (cap_caching_mode(iommu->cap))
2831 iommu_flush_iotlb_psi(iommu, 0, start_addr, 2893 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2832 offset >> VTD_PAGE_SHIFT);
2833 else 2894 else
2834 iommu_flush_write_buffer(iommu); 2895 iommu_flush_write_buffer(iommu);
2835 2896
@@ -3334,7 +3395,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3334 int adjust_width; 3395 int adjust_width;
3335 3396
3336 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3397 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3337 spin_lock_init(&domain->mapping_lock);
3338 spin_lock_init(&domain->iommu_lock); 3398 spin_lock_init(&domain->iommu_lock);
3339 3399
3340 domain_reserve_special_ranges(domain); 3400 domain_reserve_special_ranges(domain);
@@ -3388,8 +3448,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
3388 3448
3389static void vm_domain_exit(struct dmar_domain *domain) 3449static void vm_domain_exit(struct dmar_domain *domain)
3390{ 3450{
3391 u64 end;
3392
3393 /* Domain 0 is reserved, so dont process it */ 3451 /* Domain 0 is reserved, so dont process it */
3394 if (!domain) 3452 if (!domain)
3395 return; 3453 return;
@@ -3397,14 +3455,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
3397 vm_domain_remove_all_dev_info(domain); 3455 vm_domain_remove_all_dev_info(domain);
3398 /* destroy iovas */ 3456 /* destroy iovas */
3399 put_iova_domain(&domain->iovad); 3457 put_iova_domain(&domain->iovad);
3400 end = DOMAIN_MAX_ADDR(domain->gaw);
3401 end = end & (~VTD_PAGE_MASK);
3402 3458
3403 /* clear ptes */ 3459 /* clear ptes */
3404 dma_pte_clear_range(domain, 0, end); 3460 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3405 3461
3406 /* free page tables */ 3462 /* free page tables */
3407 dma_pte_free_pagetable(domain, 0, end); 3463 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3408 3464
3409 iommu_free_vm_domain(domain); 3465 iommu_free_vm_domain(domain);
3410 free_domain_mem(domain); 3466 free_domain_mem(domain);
@@ -3513,7 +3569,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3513 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3569 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3514 prot |= DMA_PTE_SNP; 3570 prot |= DMA_PTE_SNP;
3515 3571
3516 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 3572 max_addr = iova + size;
3517 if (dmar_domain->max_addr < max_addr) { 3573 if (dmar_domain->max_addr < max_addr) {
3518 int min_agaw; 3574 int min_agaw;
3519 u64 end; 3575 u64 end;
@@ -3531,8 +3587,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3531 } 3587 }
3532 dmar_domain->max_addr = max_addr; 3588 dmar_domain->max_addr = max_addr;
3533 } 3589 }
3534 3590 /* Round up size to next multiple of PAGE_SIZE, if it and
3535 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 3591 the low bits of hpa would take us onto the next page */
3592 size = aligned_nrpages(hpa, size);
3593 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3594 hpa >> VTD_PAGE_SHIFT, size, prot);
3536 return ret; 3595 return ret;
3537} 3596}
3538 3597
@@ -3540,15 +3599,15 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
3540 unsigned long iova, size_t size) 3599 unsigned long iova, size_t size)
3541{ 3600{
3542 struct dmar_domain *dmar_domain = domain->priv; 3601 struct dmar_domain *dmar_domain = domain->priv;
3543 dma_addr_t base;
3544 3602
3545 /* The address might not be aligned */ 3603 if (!size)
3546 base = iova & VTD_PAGE_MASK; 3604 return;
3547 size = VTD_PAGE_ALIGN(size); 3605
3548 dma_pte_clear_range(dmar_domain, base, base + size); 3606 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3607 (iova + size - 1) >> VTD_PAGE_SHIFT);
3549 3608
3550 if (dmar_domain->max_addr == base + size) 3609 if (dmar_domain->max_addr == iova + size)
3551 dmar_domain->max_addr = base; 3610 dmar_domain->max_addr = iova;
3552} 3611}
3553 3612
3554static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3613static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3558,7 +3617,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3558 struct dma_pte *pte; 3617 struct dma_pte *pte;
3559 u64 phys = 0; 3618 u64 phys = 0;
3560 3619
3561 pte = addr_to_dma_pte(dmar_domain, iova); 3620 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3562 if (pte) 3621 if (pte)
3563 phys = dma_pte_addr(pte); 3622 phys = dma_pte_addr(pte);
3564 3623