aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c704
1 files changed, 350 insertions, 354 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index e53eacd75c8d..53075424a434 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,7 +39,6 @@
39#include <linux/sysdev.h> 39#include <linux/sysdev.h>
40#include <asm/cacheflush.h> 40#include <asm/cacheflush.h>
41#include <asm/iommu.h> 41#include <asm/iommu.h>
42#include <asm/e820.h>
43#include "pci.h" 42#include "pci.h"
44 43
45#define ROOT_SIZE VTD_PAGE_SIZE 44#define ROOT_SIZE VTD_PAGE_SIZE
@@ -57,14 +56,32 @@
57#define MAX_AGAW_WIDTH 64 56#define MAX_AGAW_WIDTH 64
58 57
59#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
59#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
60 60
61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
64 64
65#ifndef PHYSICAL_PAGE_MASK 65
66#define PHYSICAL_PAGE_MASK PAGE_MASK 66/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
67#endif 67 are never going to work. */
68static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
69{
70 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
71}
72
73static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
74{
75 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
76}
77static inline unsigned long page_to_dma_pfn(struct page *pg)
78{
79 return mm_to_dma_pfn(page_to_pfn(pg));
80}
81static inline unsigned long virt_to_dma_pfn(void *p)
82{
83 return page_to_dma_pfn(virt_to_page(p));
84}
68 85
69/* global iommu list, set NULL for ignored DMAR units */ 86/* global iommu list, set NULL for ignored DMAR units */
70static struct intel_iommu **g_iommus; 87static struct intel_iommu **g_iommus;
@@ -205,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
205 222
206static inline u64 dma_pte_addr(struct dma_pte *pte) 223static inline u64 dma_pte_addr(struct dma_pte *pte)
207{ 224{
208 return (pte->val & VTD_PAGE_MASK); 225#ifdef CONFIG_64BIT
226 return pte->val & VTD_PAGE_MASK;
227#else
228 /* Must have a full atomic 64-bit read */
229 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
230#endif
209} 231}
210 232
211static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 233static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
212{ 234{
213 pte->val |= (addr & VTD_PAGE_MASK); 235 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
214} 236}
215 237
216static inline bool dma_pte_present(struct dma_pte *pte) 238static inline bool dma_pte_present(struct dma_pte *pte)
@@ -218,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
218 return (pte->val & 3) != 0; 240 return (pte->val & 3) != 0;
219} 241}
220 242
243static inline int first_pte_in_page(struct dma_pte *pte)
244{
245 return !((unsigned long)pte & ~VTD_PAGE_MASK);
246}
247
221/* 248/*
222 * This domain is a statically identity mapping domain. 249 * This domain is a statically identity mapping domain.
223 * 1. This domain creats a static 1:1 mapping to all usable memory. 250 * 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -245,7 +272,6 @@ struct dmar_domain {
245 struct iova_domain iovad; /* iova's that belong to this domain */ 272 struct iova_domain iovad; /* iova's that belong to this domain */
246 273
247 struct dma_pte *pgd; /* virtual address */ 274 struct dma_pte *pgd; /* virtual address */
248 spinlock_t mapping_lock; /* page table lock */
249 int gaw; /* max guest address width */ 275 int gaw; /* max guest address width */
250 276
251 /* adjusted guest address width, 0 is level 2 30-bit */ 277 /* adjusted guest address width, 0 is level 2 30-bit */
@@ -649,80 +675,78 @@ static inline int width_to_agaw(int width)
649 675
650static inline unsigned int level_to_offset_bits(int level) 676static inline unsigned int level_to_offset_bits(int level)
651{ 677{
652 return (12 + (level - 1) * LEVEL_STRIDE); 678 return (level - 1) * LEVEL_STRIDE;
653} 679}
654 680
655static inline int address_level_offset(u64 addr, int level) 681static inline int pfn_level_offset(unsigned long pfn, int level)
656{ 682{
657 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); 683 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
658} 684}
659 685
660static inline u64 level_mask(int level) 686static inline unsigned long level_mask(int level)
661{ 687{
662 return ((u64)-1 << level_to_offset_bits(level)); 688 return -1UL << level_to_offset_bits(level);
663} 689}
664 690
665static inline u64 level_size(int level) 691static inline unsigned long level_size(int level)
666{ 692{
667 return ((u64)1 << level_to_offset_bits(level)); 693 return 1UL << level_to_offset_bits(level);
668} 694}
669 695
670static inline u64 align_to_level(u64 addr, int level) 696static inline unsigned long align_to_level(unsigned long pfn, int level)
671{ 697{
672 return ((addr + level_size(level) - 1) & level_mask(level)); 698 return (pfn + level_size(level) - 1) & level_mask(level);
673} 699}
674 700
675static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) 701static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
702 unsigned long pfn)
676{ 703{
677 int addr_width = agaw_to_width(domain->agaw); 704 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
678 struct dma_pte *parent, *pte = NULL; 705 struct dma_pte *parent, *pte = NULL;
679 int level = agaw_to_level(domain->agaw); 706 int level = agaw_to_level(domain->agaw);
680 int offset; 707 int offset;
681 unsigned long flags;
682 708
683 BUG_ON(!domain->pgd); 709 BUG_ON(!domain->pgd);
684 710 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
685 addr &= (((u64)1) << addr_width) - 1;
686 parent = domain->pgd; 711 parent = domain->pgd;
687 712
688 spin_lock_irqsave(&domain->mapping_lock, flags);
689 while (level > 0) { 713 while (level > 0) {
690 void *tmp_page; 714 void *tmp_page;
691 715
692 offset = address_level_offset(addr, level); 716 offset = pfn_level_offset(pfn, level);
693 pte = &parent[offset]; 717 pte = &parent[offset];
694 if (level == 1) 718 if (level == 1)
695 break; 719 break;
696 720
697 if (!dma_pte_present(pte)) { 721 if (!dma_pte_present(pte)) {
722 uint64_t pteval;
723
698 tmp_page = alloc_pgtable_page(); 724 tmp_page = alloc_pgtable_page();
699 725
700 if (!tmp_page) { 726 if (!tmp_page)
701 spin_unlock_irqrestore(&domain->mapping_lock,
702 flags);
703 return NULL; 727 return NULL;
728
729 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
730 pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
731 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
732 /* Someone else set it while we were thinking; use theirs. */
733 free_pgtable_page(tmp_page);
734 } else {
735 dma_pte_addr(pte);
736 domain_flush_cache(domain, pte, sizeof(*pte));
704 } 737 }
705 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
706 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
707 /*
708 * high level table always sets r/w, last level page
709 * table control read/write
710 */
711 dma_set_pte_readable(pte);
712 dma_set_pte_writable(pte);
713 domain_flush_cache(domain, pte, sizeof(*pte));
714 } 738 }
715 parent = phys_to_virt(dma_pte_addr(pte)); 739 parent = phys_to_virt(dma_pte_addr(pte));
716 level--; 740 level--;
717 } 741 }
718 742
719 spin_unlock_irqrestore(&domain->mapping_lock, flags);
720 return pte; 743 return pte;
721} 744}
722 745
723/* return address's pte at specific level */ 746/* return address's pte at specific level */
724static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, 747static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
725 int level) 748 unsigned long pfn,
749 int level)
726{ 750{
727 struct dma_pte *parent, *pte = NULL; 751 struct dma_pte *parent, *pte = NULL;
728 int total = agaw_to_level(domain->agaw); 752 int total = agaw_to_level(domain->agaw);
@@ -730,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
730 754
731 parent = domain->pgd; 755 parent = domain->pgd;
732 while (level <= total) { 756 while (level <= total) {
733 offset = address_level_offset(addr, total); 757 offset = pfn_level_offset(pfn, total);
734 pte = &parent[offset]; 758 pte = &parent[offset];
735 if (level == total) 759 if (level == total)
736 return pte; 760 return pte;
@@ -743,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
743 return NULL; 767 return NULL;
744} 768}
745 769
746/* clear one page's page table */
747static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
748{
749 struct dma_pte *pte = NULL;
750
751 /* get last level pte */
752 pte = dma_addr_level_pte(domain, addr, 1);
753
754 if (pte) {
755 dma_clear_pte(pte);
756 domain_flush_cache(domain, pte, sizeof(*pte));
757 }
758}
759
760/* clear last level pte, a tlb flush should be followed */ 770/* clear last level pte, a tlb flush should be followed */
761static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) 771static void dma_pte_clear_range(struct dmar_domain *domain,
772 unsigned long start_pfn,
773 unsigned long last_pfn)
762{ 774{
763 int addr_width = agaw_to_width(domain->agaw); 775 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
764 int npages; 776 struct dma_pte *first_pte, *pte;
777
778 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
779 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
765 780
766 start &= (((u64)1) << addr_width) - 1; 781 /* we don't need lock here; nobody else touches the iova range */
767 end &= (((u64)1) << addr_width) - 1; 782 while (start_pfn <= last_pfn) {
768 /* in case it's partial page */ 783 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
769 start &= PAGE_MASK; 784 if (!pte) {
770 end = PAGE_ALIGN(end); 785 start_pfn = align_to_level(start_pfn + 1, 2);
771 npages = (end - start) / VTD_PAGE_SIZE; 786 continue;
787 }
788 do {
789 dma_clear_pte(pte);
790 start_pfn++;
791 pte++;
792 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
772 793
773 /* we don't need lock here, nobody else touches the iova range */ 794 domain_flush_cache(domain, first_pte,
774 while (npages--) { 795 (void *)pte - (void *)first_pte);
775 dma_pte_clear_one(domain, start);
776 start += VTD_PAGE_SIZE;
777 } 796 }
778} 797}
779 798
780/* free page table pages. last level pte should already be cleared */ 799/* free page table pages. last level pte should already be cleared */
781static void dma_pte_free_pagetable(struct dmar_domain *domain, 800static void dma_pte_free_pagetable(struct dmar_domain *domain,
782 u64 start, u64 end) 801 unsigned long start_pfn,
802 unsigned long last_pfn)
783{ 803{
784 int addr_width = agaw_to_width(domain->agaw); 804 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
785 struct dma_pte *pte; 805 struct dma_pte *first_pte, *pte;
786 int total = agaw_to_level(domain->agaw); 806 int total = agaw_to_level(domain->agaw);
787 int level; 807 int level;
788 u64 tmp; 808 unsigned long tmp;
789 809
790 start &= (((u64)1) << addr_width) - 1; 810 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
791 end &= (((u64)1) << addr_width) - 1; 811 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
792 812
793 /* we don't need lock here, nobody else touches the iova range */ 813 /* We don't need lock here; nobody else touches the iova range */
794 level = 2; 814 level = 2;
795 while (level <= total) { 815 while (level <= total) {
796 tmp = align_to_level(start, level); 816 tmp = align_to_level(start_pfn, level);
797 if (tmp >= end || (tmp + level_size(level) > end)) 817
818 /* If we can't even clear one PTE at this level, we're done */
819 if (tmp + level_size(level) - 1 > last_pfn)
798 return; 820 return;
799 821
800 while (tmp < end) { 822 while (tmp + level_size(level) - 1 <= last_pfn) {
801 pte = dma_addr_level_pte(domain, tmp, level); 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
802 if (pte) { 824 if (!pte) {
803 free_pgtable_page( 825 tmp = align_to_level(tmp + 1, level + 1);
804 phys_to_virt(dma_pte_addr(pte))); 826 continue;
805 dma_clear_pte(pte);
806 domain_flush_cache(domain, pte, sizeof(*pte));
807 } 827 }
808 tmp += level_size(level); 828 do {
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
833 pte++;
834 tmp += level_size(level);
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
809 } 841 }
810 level++; 842 level++;
811 } 843 }
812 /* free pgd */ 844 /* free pgd */
813 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
814 free_pgtable_page(domain->pgd); 846 free_pgtable_page(domain->pgd);
815 domain->pgd = NULL; 847 domain->pgd = NULL;
816 } 848 }
@@ -1036,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1036} 1068}
1037 1069
1038static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1039 u64 addr, unsigned int pages) 1071 unsigned long pfn, unsigned int pages)
1040{ 1072{
1041 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1042 1075
1043 BUG_ON(addr & (~VTD_PAGE_MASK));
1044 BUG_ON(pages == 0); 1076 BUG_ON(pages == 0);
1045 1077
1046 /* 1078 /*
@@ -1055,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1055 else 1087 else
1056 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1057 DMA_TLB_PSI_FLUSH); 1089 DMA_TLB_PSI_FLUSH);
1058 if (did) 1090
1091 /*
1092 * In caching mode, domain ID 0 is reserved for non-present to present
1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
1059 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1060} 1097}
1061 1098
@@ -1280,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
1280 struct pci_dev *pdev = NULL; 1317 struct pci_dev *pdev = NULL;
1281 struct iova *iova; 1318 struct iova *iova;
1282 int i; 1319 int i;
1283 u64 addr, size;
1284 1320
1285 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1321 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1286 1322
@@ -1303,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
1303 r = &pdev->resource[i]; 1339 r = &pdev->resource[i];
1304 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1305 continue; 1341 continue;
1306 addr = r->start; 1342 iova = reserve_iova(&reserved_iova_list,
1307 addr &= PHYSICAL_PAGE_MASK; 1343 IOVA_PFN(r->start),
1308 size = r->end - addr; 1344 IOVA_PFN(r->end));
1309 size = PAGE_ALIGN(size);
1310 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1311 IOVA_PFN(size + addr) - 1);
1312 if (!iova) 1345 if (!iova)
1313 printk(KERN_ERR "Reserve iova failed\n"); 1346 printk(KERN_ERR "Reserve iova failed\n");
1314 } 1347 }
@@ -1342,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1342 unsigned long sagaw; 1375 unsigned long sagaw;
1343 1376
1344 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1345 spin_lock_init(&domain->mapping_lock);
1346 spin_lock_init(&domain->iommu_lock); 1378 spin_lock_init(&domain->iommu_lock);
1347 1379
1348 domain_reserve_special_ranges(domain); 1380 domain_reserve_special_ranges(domain);
@@ -1389,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
1389{ 1421{
1390 struct dmar_drhd_unit *drhd; 1422 struct dmar_drhd_unit *drhd;
1391 struct intel_iommu *iommu; 1423 struct intel_iommu *iommu;
1392 u64 end;
1393 1424
1394 /* Domain 0 is reserved, so dont process it */ 1425 /* Domain 0 is reserved, so dont process it */
1395 if (!domain) 1426 if (!domain)
@@ -1398,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
1398 domain_remove_dev_info(domain); 1429 domain_remove_dev_info(domain);
1399 /* destroy iovas */ 1430 /* destroy iovas */
1400 put_iova_domain(&domain->iovad); 1431 put_iova_domain(&domain->iovad);
1401 end = DOMAIN_MAX_ADDR(domain->gaw);
1402 end = end & (~PAGE_MASK);
1403 1432
1404 /* clear ptes */ 1433 /* clear ptes */
1405 dma_pte_clear_range(domain, 0, end); 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1406 1435
1407 /* free page tables */ 1436 /* free page tables */
1408 dma_pte_free_pagetable(domain, 0, end); 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1409 1438
1410 for_each_active_iommu(iommu, drhd) 1439 for_each_active_iommu(iommu, drhd)
1411 if (test_bit(iommu->seq_id, &domain->iommu_bmp)) 1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
@@ -1619,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
1619 tmp->devfn); 1648 tmp->devfn);
1620} 1649}
1621 1650
1622static int 1651static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1623domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 1652 struct scatterlist *sg, unsigned long phys_pfn,
1624 u64 hpa, size_t size, int prot) 1653 unsigned long nr_pages, int prot)
1625{ 1654{
1626 u64 start_pfn, end_pfn; 1655 struct dma_pte *first_pte = NULL, *pte = NULL;
1627 struct dma_pte *pte; 1656 phys_addr_t uninitialized_var(pteval);
1628 int index; 1657 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1629 int addr_width = agaw_to_width(domain->agaw); 1658 unsigned long sg_res;
1630 1659
1631 hpa &= (((u64)1) << addr_width) - 1; 1660 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1632 1661
1633 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1662 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1634 return -EINVAL; 1663 return -EINVAL;
1635 iova &= PAGE_MASK; 1664
1636 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; 1665 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1637 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; 1666
1638 index = 0; 1667 if (sg)
1639 while (start_pfn < end_pfn) { 1668 sg_res = 0;
1640 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); 1669 else {
1641 if (!pte) 1670 sg_res = nr_pages + 1;
1642 return -ENOMEM; 1671 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1672 }
1673
1674 while (nr_pages--) {
1675 uint64_t tmp;
1676
1677 if (!sg_res) {
1678 sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
1679 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1680 sg->dma_length = sg->length;
1681 pteval = page_to_phys(sg_page(sg)) | prot;
1682 }
1683 if (!pte) {
1684 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1685 if (!pte)
1686 return -ENOMEM;
1687 }
1643 /* We don't need lock here, nobody else 1688 /* We don't need lock here, nobody else
1644 * touches the iova range 1689 * touches the iova range
1645 */ 1690 */
1646 BUG_ON(dma_pte_addr(pte)); 1691 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1647 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1692 if (tmp) {
1648 dma_set_pte_prot(pte, prot); 1693 static int dumps = 5;
1649 if (prot & DMA_PTE_SNP) 1694 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1650 dma_set_pte_snp(pte); 1695 iov_pfn, tmp, (unsigned long long)pteval);
1651 domain_flush_cache(domain, pte, sizeof(*pte)); 1696 if (dumps) {
1652 start_pfn++; 1697 dumps--;
1653 index++; 1698 debug_dma_dump_mappings(NULL);
1699 }
1700 WARN_ON(1);
1701 }
1702 pte++;
1703 if (!nr_pages || first_pte_in_page(pte)) {
1704 domain_flush_cache(domain, first_pte,
1705 (void *)pte - (void *)first_pte);
1706 pte = NULL;
1707 }
1708 iov_pfn++;
1709 pteval += VTD_PAGE_SIZE;
1710 sg_res--;
1711 if (!sg_res)
1712 sg = sg_next(sg);
1654 } 1713 }
1655 return 0; 1714 return 0;
1656} 1715}
1657 1716
1717static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1718 struct scatterlist *sg, unsigned long nr_pages,
1719 int prot)
1720{
1721 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1722}
1723
1724static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1725 unsigned long phys_pfn, unsigned long nr_pages,
1726 int prot)
1727{
1728 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1729}
1730
1658static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1731static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1659{ 1732{
1660 if (!iommu) 1733 if (!iommu)
@@ -1845,58 +1918,61 @@ error:
1845 1918
1846static int iommu_identity_mapping; 1919static int iommu_identity_mapping;
1847 1920
1921static int iommu_domain_identity_map(struct dmar_domain *domain,
1922 unsigned long long start,
1923 unsigned long long end)
1924{
1925 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1926 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1927
1928 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1929 dma_to_mm_pfn(last_vpfn))) {
1930 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1931 return -ENOMEM;
1932 }
1933
1934 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1935 start, end, domain->id);
1936 /*
1937 * RMRR range might have overlap with physical memory range,
1938 * clear it first
1939 */
1940 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1941
1942 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1943 last_vpfn - first_vpfn + 1,
1944 DMA_PTE_READ|DMA_PTE_WRITE);
1945}
1946
1848static int iommu_prepare_identity_map(struct pci_dev *pdev, 1947static int iommu_prepare_identity_map(struct pci_dev *pdev,
1849 unsigned long long start, 1948 unsigned long long start,
1850 unsigned long long end) 1949 unsigned long long end)
1851{ 1950{
1852 struct dmar_domain *domain; 1951 struct dmar_domain *domain;
1853 unsigned long size;
1854 unsigned long long base;
1855 int ret; 1952 int ret;
1856 1953
1857 printk(KERN_INFO 1954 printk(KERN_INFO
1858 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1955 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1859 pci_name(pdev), start, end); 1956 pci_name(pdev), start, end);
1860 if (iommu_identity_mapping) 1957
1861 domain = si_domain; 1958 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1862 else
1863 /* page table init */
1864 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1865 if (!domain) 1959 if (!domain)
1866 return -ENOMEM; 1960 return -ENOMEM;
1867 1961
1868 /* The address might not be aligned */ 1962 ret = iommu_domain_identity_map(domain, start, end);
1869 base = start & PAGE_MASK;
1870 size = end - base;
1871 size = PAGE_ALIGN(size);
1872 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1873 IOVA_PFN(base + size) - 1)) {
1874 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1875 ret = -ENOMEM;
1876 goto error;
1877 }
1878
1879 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1880 size, base, pci_name(pdev));
1881 /*
1882 * RMRR range might have overlap with physical memory range,
1883 * clear it first
1884 */
1885 dma_pte_clear_range(domain, base, base + size);
1886
1887 ret = domain_page_mapping(domain, base, base, size,
1888 DMA_PTE_READ|DMA_PTE_WRITE);
1889 if (ret) 1963 if (ret)
1890 goto error; 1964 goto error;
1891 1965
1892 /* context entry init */ 1966 /* context entry init */
1893 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 1967 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1894 if (!ret) 1968 if (ret)
1895 return 0; 1969 goto error;
1896error: 1970
1971 return 0;
1972
1973 error:
1897 domain_exit(domain); 1974 domain_exit(domain);
1898 return ret; 1975 return ret;
1899
1900} 1976}
1901 1977
1902static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 1978static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1908,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1908 rmrr->end_address + 1); 1984 rmrr->end_address + 1);
1909} 1985}
1910 1986
1911#ifdef CONFIG_DMAR_GFX_WA
1912struct iommu_prepare_data {
1913 struct pci_dev *pdev;
1914 int ret;
1915};
1916
1917static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1918 unsigned long end_pfn, void *datax)
1919{
1920 struct iommu_prepare_data *data;
1921
1922 data = (struct iommu_prepare_data *)datax;
1923
1924 data->ret = iommu_prepare_identity_map(data->pdev,
1925 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1926 return data->ret;
1927
1928}
1929
1930static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1931{
1932 int nid;
1933 struct iommu_prepare_data data;
1934
1935 data.pdev = pdev;
1936 data.ret = 0;
1937
1938 for_each_online_node(nid) {
1939 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1940 if (data.ret)
1941 return data.ret;
1942 }
1943 return data.ret;
1944}
1945
1946static void __init iommu_prepare_gfx_mapping(void)
1947{
1948 struct pci_dev *pdev = NULL;
1949 int ret;
1950
1951 for_each_pci_dev(pdev) {
1952 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1953 !IS_GFX_DEVICE(pdev))
1954 continue;
1955 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1956 pci_name(pdev));
1957 ret = iommu_prepare_with_active_regions(pdev);
1958 if (ret)
1959 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1960 }
1961}
1962#else /* !CONFIG_DMAR_GFX_WA */
1963static inline void iommu_prepare_gfx_mapping(void)
1964{
1965 return;
1966}
1967#endif
1968
1969#ifdef CONFIG_DMAR_FLOPPY_WA 1987#ifdef CONFIG_DMAR_FLOPPY_WA
1970static inline void iommu_prepare_isa(void) 1988static inline void iommu_prepare_isa(void)
1971{ 1989{
@@ -1976,12 +1994,12 @@ static inline void iommu_prepare_isa(void)
1976 if (!pdev) 1994 if (!pdev)
1977 return; 1995 return;
1978 1996
1979 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); 1997 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
1980 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 1998 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1981 1999
1982 if (ret) 2000 if (ret)
1983 printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " 2001 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
1984 "floppy might not work\n"); 2002 "floppy might not work\n");
1985 2003
1986} 2004}
1987#else 2005#else
@@ -2009,16 +2027,30 @@ static int __init init_context_pass_through(void)
2009} 2027}
2010 2028
2011static int md_domain_init(struct dmar_domain *domain, int guest_width); 2029static int md_domain_init(struct dmar_domain *domain, int guest_width);
2030
2031static int __init si_domain_work_fn(unsigned long start_pfn,
2032 unsigned long end_pfn, void *datax)
2033{
2034 int *ret = datax;
2035
2036 *ret = iommu_domain_identity_map(si_domain,
2037 (uint64_t)start_pfn << PAGE_SHIFT,
2038 (uint64_t)end_pfn << PAGE_SHIFT);
2039 return *ret;
2040
2041}
2042
2012static int si_domain_init(void) 2043static int si_domain_init(void)
2013{ 2044{
2014 struct dmar_drhd_unit *drhd; 2045 struct dmar_drhd_unit *drhd;
2015 struct intel_iommu *iommu; 2046 struct intel_iommu *iommu;
2016 int ret = 0; 2047 int nid, ret = 0;
2017 2048
2018 si_domain = alloc_domain(); 2049 si_domain = alloc_domain();
2019 if (!si_domain) 2050 if (!si_domain)
2020 return -EFAULT; 2051 return -EFAULT;
2021 2052
2053 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2022 2054
2023 for_each_active_iommu(iommu, drhd) { 2055 for_each_active_iommu(iommu, drhd) {
2024 ret = iommu_attach_domain(si_domain, iommu); 2056 ret = iommu_attach_domain(si_domain, iommu);
@@ -2035,6 +2067,12 @@ static int si_domain_init(void)
2035 2067
2036 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2068 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2037 2069
2070 for_each_online_node(nid) {
2071 work_with_active_regions(nid, si_domain_work_fn, &ret);
2072 if (ret)
2073 return ret;
2074 }
2075
2038 return 0; 2076 return 0;
2039} 2077}
2040 2078
@@ -2081,7 +2119,6 @@ static int domain_add_dev_info(struct dmar_domain *domain,
2081 2119
2082static int iommu_prepare_static_identity_mapping(void) 2120static int iommu_prepare_static_identity_mapping(void)
2083{ 2121{
2084 int i;
2085 struct pci_dev *pdev = NULL; 2122 struct pci_dev *pdev = NULL;
2086 int ret; 2123 int ret;
2087 2124
@@ -2089,20 +2126,14 @@ static int iommu_prepare_static_identity_mapping(void)
2089 if (ret) 2126 if (ret)
2090 return -EFAULT; 2127 return -EFAULT;
2091 2128
2092 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2093 for_each_pci_dev(pdev) { 2129 for_each_pci_dev(pdev) {
2094 for (i = 0; i < e820.nr_map; i++) { 2130 printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
2095 struct e820entry *ei = &e820.map[i]; 2131 pci_name(pdev));
2096 2132
2097 if (ei->type == E820_RAM) { 2133 ret = domain_context_mapping(si_domain, pdev,
2098 ret = iommu_prepare_identity_map(pdev, 2134 CONTEXT_TT_MULTI_LEVEL);
2099 ei->addr, ei->addr + ei->size); 2135 if (ret)
2100 if (ret) { 2136 return ret;
2101 printk(KERN_INFO "1:1 mapping to one domain failed.\n");
2102 return -EFAULT;
2103 }
2104 }
2105 }
2106 ret = domain_add_dev_info(si_domain, pdev); 2137 ret = domain_add_dev_info(si_domain, pdev);
2107 if (ret) 2138 if (ret)
2108 return ret; 2139 return ret;
@@ -2293,8 +2324,6 @@ int __init init_dmars(void)
2293 } 2324 }
2294 } 2325 }
2295 2326
2296 iommu_prepare_gfx_mapping();
2297
2298 iommu_prepare_isa(); 2327 iommu_prepare_isa();
2299 } 2328 }
2300 2329
@@ -2339,50 +2368,40 @@ error:
2339 return ret; 2368 return ret;
2340} 2369}
2341 2370
2342static inline u64 aligned_size(u64 host_addr, size_t size) 2371static inline unsigned long aligned_nrpages(unsigned long host_addr,
2343{ 2372 size_t size)
2344 u64 addr;
2345 addr = (host_addr & (~PAGE_MASK)) + size;
2346 return PAGE_ALIGN(addr);
2347}
2348
2349struct iova *
2350iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2351{ 2373{
2352 struct iova *piova; 2374 host_addr &= ~PAGE_MASK;
2375 host_addr += size + PAGE_SIZE - 1;
2353 2376
2354 /* Make sure it's in range */ 2377 return host_addr >> VTD_PAGE_SHIFT;
2355 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2356 if (!size || (IOVA_START_ADDR + size > end))
2357 return NULL;
2358
2359 piova = alloc_iova(&domain->iovad,
2360 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2361 return piova;
2362} 2378}
2363 2379
2364static struct iova * 2380static struct iova *intel_alloc_iova(struct device *dev,
2365__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, 2381 struct dmar_domain *domain,
2366 size_t size, u64 dma_mask) 2382 unsigned long nrpages, uint64_t dma_mask)
2367{ 2383{
2368 struct pci_dev *pdev = to_pci_dev(dev); 2384 struct pci_dev *pdev = to_pci_dev(dev);
2369 struct iova *iova = NULL; 2385 struct iova *iova = NULL;
2370 2386
2371 if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) 2387 /* Restrict dma_mask to the width that the iommu can handle */
2372 iova = iommu_alloc_iova(domain, size, dma_mask); 2388 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2373 else { 2389
2390 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2374 /* 2391 /*
2375 * First try to allocate an io virtual address in 2392 * First try to allocate an io virtual address in
2376 * DMA_BIT_MASK(32) and if that fails then try allocating 2393 * DMA_BIT_MASK(32) and if that fails then try allocating
2377 * from higher range 2394 * from higher range
2378 */ 2395 */
2379 iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); 2396 iova = alloc_iova(&domain->iovad, nrpages,
2380 if (!iova) 2397 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2381 iova = iommu_alloc_iova(domain, size, dma_mask); 2398 if (iova)
2382 } 2399 return iova;
2383 2400 }
2384 if (!iova) { 2401 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2385 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 2402 if (unlikely(!iova)) {
2403 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2404 nrpages, pci_name(pdev));
2386 return NULL; 2405 return NULL;
2387 } 2406 }
2388 2407
@@ -2485,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2485 return 0; 2504 return 0;
2486 2505
2487 iommu = domain_get_iommu(domain); 2506 iommu = domain_get_iommu(domain);
2488 size = aligned_size((u64)paddr, size); 2507 size = aligned_nrpages(paddr, size);
2489 2508
2490 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2509 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2491 if (!iova) 2510 if (!iova)
2492 goto error; 2511 goto error;
2493 2512
2494 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2495
2496 /* 2513 /*
2497 * Check if DMAR supports zero-length reads on write only 2514 * Check if DMAR supports zero-length reads on write only
2498 * mappings.. 2515 * mappings..
@@ -2508,20 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2508 * might have two guest_addr mapping to the same host paddr, but this 2525 * might have two guest_addr mapping to the same host paddr, but this
2509 * is not a big problem 2526 * is not a big problem
2510 */ 2527 */
2511 ret = domain_page_mapping(domain, start_paddr, 2528 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2512 ((u64)paddr) & PHYSICAL_PAGE_MASK, 2529 paddr >> VTD_PAGE_SHIFT, size, prot);
2513 size, prot);
2514 if (ret) 2530 if (ret)
2515 goto error; 2531 goto error;
2516 2532
2517 /* it's a non-present to present mapping. Only flush if caching mode */ 2533 /* it's a non-present to present mapping. Only flush if caching mode */
2518 if (cap_caching_mode(iommu->cap)) 2534 if (cap_caching_mode(iommu->cap))
2519 iommu_flush_iotlb_psi(iommu, 0, start_paddr, 2535 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2520 size >> VTD_PAGE_SHIFT);
2521 else 2536 else
2522 iommu_flush_write_buffer(iommu); 2537 iommu_flush_write_buffer(iommu);
2523 2538
2524 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2539 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2540 start_paddr += paddr & ~PAGE_MASK;
2541 return start_paddr;
2525 2542
2526error: 2543error:
2527 if (iova) 2544 if (iova)
@@ -2614,7 +2631,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2614{ 2631{
2615 struct pci_dev *pdev = to_pci_dev(dev); 2632 struct pci_dev *pdev = to_pci_dev(dev);
2616 struct dmar_domain *domain; 2633 struct dmar_domain *domain;
2617 unsigned long start_addr; 2634 unsigned long start_pfn, last_pfn;
2618 struct iova *iova; 2635 struct iova *iova;
2619 struct intel_iommu *iommu; 2636 struct intel_iommu *iommu;
2620 2637
@@ -2627,22 +2644,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2627 iommu = domain_get_iommu(domain); 2644 iommu = domain_get_iommu(domain);
2628 2645
2629 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2646 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2630 if (!iova) 2647 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2648 (unsigned long long)dev_addr))
2631 return; 2649 return;
2632 2650
2633 start_addr = iova->pfn_lo << PAGE_SHIFT; 2651 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2634 size = aligned_size((u64)dev_addr, size); 2652 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2635 2653
2636 pr_debug("Device %s unmapping: %zx@%llx\n", 2654 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2637 pci_name(pdev), size, (unsigned long long)start_addr); 2655 pci_name(pdev), start_pfn, last_pfn);
2638 2656
2639 /* clear the whole page */ 2657 /* clear the whole page */
2640 dma_pte_clear_range(domain, start_addr, start_addr + size); 2658 dma_pte_clear_range(domain, start_pfn, last_pfn);
2659
2641 /* free page tables */ 2660 /* free page tables */
2642 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2661 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2662
2643 if (intel_iommu_strict) { 2663 if (intel_iommu_strict) {
2644 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2664 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2645 size >> VTD_PAGE_SHIFT); 2665 last_pfn - start_pfn + 1);
2646 /* free iova */ 2666 /* free iova */
2647 __free_iova(&domain->iovad, iova); 2667 __free_iova(&domain->iovad, iova);
2648 } else { 2668 } else {
@@ -2700,14 +2720,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2700 int nelems, enum dma_data_direction dir, 2720 int nelems, enum dma_data_direction dir,
2701 struct dma_attrs *attrs) 2721 struct dma_attrs *attrs)
2702{ 2722{
2703 int i;
2704 struct pci_dev *pdev = to_pci_dev(hwdev); 2723 struct pci_dev *pdev = to_pci_dev(hwdev);
2705 struct dmar_domain *domain; 2724 struct dmar_domain *domain;
2706 unsigned long start_addr; 2725 unsigned long start_pfn, last_pfn;
2707 struct iova *iova; 2726 struct iova *iova;
2708 size_t size = 0;
2709 phys_addr_t addr;
2710 struct scatterlist *sg;
2711 struct intel_iommu *iommu; 2727 struct intel_iommu *iommu;
2712 2728
2713 if (iommu_no_mapping(pdev)) 2729 if (iommu_no_mapping(pdev))
@@ -2719,22 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2719 iommu = domain_get_iommu(domain); 2735 iommu = domain_get_iommu(domain);
2720 2736
2721 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2737 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2722 if (!iova) 2738 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2739 (unsigned long long)sglist[0].dma_address))
2723 return; 2740 return;
2724 for_each_sg(sglist, sg, nelems, i) {
2725 addr = page_to_phys(sg_page(sg)) + sg->offset;
2726 size += aligned_size((u64)addr, sg->length);
2727 }
2728 2741
2729 start_addr = iova->pfn_lo << PAGE_SHIFT; 2742 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2743 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2730 2744
2731 /* clear the whole page */ 2745 /* clear the whole page */
2732 dma_pte_clear_range(domain, start_addr, start_addr + size); 2746 dma_pte_clear_range(domain, start_pfn, last_pfn);
2747
2733 /* free page tables */ 2748 /* free page tables */
2734 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2749 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2735 2750
2736 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2751 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2737 size >> VTD_PAGE_SHIFT); 2752 (last_pfn - start_pfn + 1));
2738 2753
2739 /* free iova */ 2754 /* free iova */
2740 __free_iova(&domain->iovad, iova); 2755 __free_iova(&domain->iovad, iova);
@@ -2757,17 +2772,16 @@ static int intel_nontranslate_map_sg(struct device *hddev,
2757static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2772static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2758 enum dma_data_direction dir, struct dma_attrs *attrs) 2773 enum dma_data_direction dir, struct dma_attrs *attrs)
2759{ 2774{
2760 phys_addr_t addr;
2761 int i; 2775 int i;
2762 struct pci_dev *pdev = to_pci_dev(hwdev); 2776 struct pci_dev *pdev = to_pci_dev(hwdev);
2763 struct dmar_domain *domain; 2777 struct dmar_domain *domain;
2764 size_t size = 0; 2778 size_t size = 0;
2765 int prot = 0; 2779 int prot = 0;
2766 size_t offset = 0; 2780 size_t offset_pfn = 0;
2767 struct iova *iova = NULL; 2781 struct iova *iova = NULL;
2768 int ret; 2782 int ret;
2769 struct scatterlist *sg; 2783 struct scatterlist *sg;
2770 unsigned long start_addr; 2784 unsigned long start_vpfn;
2771 struct intel_iommu *iommu; 2785 struct intel_iommu *iommu;
2772 2786
2773 BUG_ON(dir == DMA_NONE); 2787 BUG_ON(dir == DMA_NONE);
@@ -2780,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2780 2794
2781 iommu = domain_get_iommu(domain); 2795 iommu = domain_get_iommu(domain);
2782 2796
2783 for_each_sg(sglist, sg, nelems, i) { 2797 for_each_sg(sglist, sg, nelems, i)
2784 addr = page_to_phys(sg_page(sg)) + sg->offset; 2798 size += aligned_nrpages(sg->offset, sg->length);
2785 size += aligned_size((u64)addr, sg->length);
2786 }
2787 2799
2788 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2800 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2789 if (!iova) { 2801 if (!iova) {
2790 sglist->dma_length = 0; 2802 sglist->dma_length = 0;
2791 return 0; 2803 return 0;
@@ -2801,35 +2813,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2801 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2813 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2802 prot |= DMA_PTE_WRITE; 2814 prot |= DMA_PTE_WRITE;
2803 2815
2804 start_addr = iova->pfn_lo << PAGE_SHIFT; 2816 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2805 offset = 0; 2817
2806 for_each_sg(sglist, sg, nelems, i) { 2818 ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
2807 addr = page_to_phys(sg_page(sg)) + sg->offset; 2819 if (unlikely(ret)) {
2808 size = aligned_size((u64)addr, sg->length); 2820 /* clear the page */
2809 ret = domain_page_mapping(domain, start_addr + offset, 2821 dma_pte_clear_range(domain, start_vpfn,
2810 ((u64)addr) & PHYSICAL_PAGE_MASK, 2822 start_vpfn + size - 1);
2811 size, prot); 2823 /* free page tables */
2812 if (ret) { 2824 dma_pte_free_pagetable(domain, start_vpfn,
2813 /* clear the page */ 2825 start_vpfn + size - 1);
2814 dma_pte_clear_range(domain, start_addr, 2826 /* free iova */
2815 start_addr + offset); 2827 __free_iova(&domain->iovad, iova);
2816 /* free page tables */ 2828 return 0;
2817 dma_pte_free_pagetable(domain, start_addr,
2818 start_addr + offset);
2819 /* free iova */
2820 __free_iova(&domain->iovad, iova);
2821 return 0;
2822 }
2823 sg->dma_address = start_addr + offset +
2824 ((u64)addr & (~PAGE_MASK));
2825 sg->dma_length = sg->length;
2826 offset += size;
2827 } 2829 }
2828 2830
2829 /* it's a non-present to present mapping. Only flush if caching mode */ 2831 /* it's a non-present to present mapping. Only flush if caching mode */
2830 if (cap_caching_mode(iommu->cap)) 2832 if (cap_caching_mode(iommu->cap))
2831 iommu_flush_iotlb_psi(iommu, 0, start_addr, 2833 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2832 offset >> VTD_PAGE_SHIFT);
2833 else 2834 else
2834 iommu_flush_write_buffer(iommu); 2835 iommu_flush_write_buffer(iommu);
2835 2836
@@ -3334,7 +3335,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3334 int adjust_width; 3335 int adjust_width;
3335 3336
3336 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3337 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3337 spin_lock_init(&domain->mapping_lock);
3338 spin_lock_init(&domain->iommu_lock); 3338 spin_lock_init(&domain->iommu_lock);
3339 3339
3340 domain_reserve_special_ranges(domain); 3340 domain_reserve_special_ranges(domain);
@@ -3388,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
3388 3388
3389static void vm_domain_exit(struct dmar_domain *domain) 3389static void vm_domain_exit(struct dmar_domain *domain)
3390{ 3390{
3391 u64 end;
3392
3393 /* Domain 0 is reserved, so dont process it */ 3391 /* Domain 0 is reserved, so dont process it */
3394 if (!domain) 3392 if (!domain)
3395 return; 3393 return;
@@ -3397,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
3397 vm_domain_remove_all_dev_info(domain); 3395 vm_domain_remove_all_dev_info(domain);
3398 /* destroy iovas */ 3396 /* destroy iovas */
3399 put_iova_domain(&domain->iovad); 3397 put_iova_domain(&domain->iovad);
3400 end = DOMAIN_MAX_ADDR(domain->gaw);
3401 end = end & (~VTD_PAGE_MASK);
3402 3398
3403 /* clear ptes */ 3399 /* clear ptes */
3404 dma_pte_clear_range(domain, 0, end); 3400 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3405 3401
3406 /* free page tables */ 3402 /* free page tables */
3407 dma_pte_free_pagetable(domain, 0, end); 3403 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3408 3404
3409 iommu_free_vm_domain(domain); 3405 iommu_free_vm_domain(domain);
3410 free_domain_mem(domain); 3406 free_domain_mem(domain);
@@ -3513,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3513 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3509 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3514 prot |= DMA_PTE_SNP; 3510 prot |= DMA_PTE_SNP;
3515 3511
3516 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 3512 max_addr = iova + size;
3517 if (dmar_domain->max_addr < max_addr) { 3513 if (dmar_domain->max_addr < max_addr) {
3518 int min_agaw; 3514 int min_agaw;
3519 u64 end; 3515 u64 end;
@@ -3531,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3531 } 3527 }
3532 dmar_domain->max_addr = max_addr; 3528 dmar_domain->max_addr = max_addr;
3533 } 3529 }
3534 3530 /* Round up size to next multiple of PAGE_SIZE, if it and
3535 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 3531 the low bits of hpa would take us onto the next page */
3532 size = aligned_nrpages(hpa, size);
3533 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3534 hpa >> VTD_PAGE_SHIFT, size, prot);
3536 return ret; 3535 return ret;
3537} 3536}
3538 3537
@@ -3540,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
3540 unsigned long iova, size_t size) 3539 unsigned long iova, size_t size)
3541{ 3540{
3542 struct dmar_domain *dmar_domain = domain->priv; 3541 struct dmar_domain *dmar_domain = domain->priv;
3543 dma_addr_t base;
3544 3542
3545 /* The address might not be aligned */ 3543 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3546 base = iova & VTD_PAGE_MASK; 3544 (iova + size - 1) >> VTD_PAGE_SHIFT);
3547 size = VTD_PAGE_ALIGN(size);
3548 dma_pte_clear_range(dmar_domain, base, base + size);
3549 3545
3550 if (dmar_domain->max_addr == base + size) 3546 if (dmar_domain->max_addr == iova + size)
3551 dmar_domain->max_addr = base; 3547 dmar_domain->max_addr = iova;
3552} 3548}
3553 3549
3554static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3550static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3558,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3558 struct dma_pte *pte; 3554 struct dma_pte *pte;
3559 u64 phys = 0; 3555 u64 phys = 0;
3560 3556
3561 pte = addr_to_dma_pte(dmar_domain, iova); 3557 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3562 if (pte) 3558 if (pte)
3563 phys = dma_pte_addr(pte); 3559 phys = dma_pte_addr(pte);
3564 3560