aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c800
1 files changed, 430 insertions, 370 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index e53eacd75c8d..2314ad7ee5fe 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,7 +39,6 @@
39#include <linux/sysdev.h> 39#include <linux/sysdev.h>
40#include <asm/cacheflush.h> 40#include <asm/cacheflush.h>
41#include <asm/iommu.h> 41#include <asm/iommu.h>
42#include <asm/e820.h>
43#include "pci.h" 42#include "pci.h"
44 43
45#define ROOT_SIZE VTD_PAGE_SIZE 44#define ROOT_SIZE VTD_PAGE_SIZE
@@ -57,14 +56,32 @@
57#define MAX_AGAW_WIDTH 64 56#define MAX_AGAW_WIDTH 64
58 57
59#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
59#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
60 60
61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
64 64
65#ifndef PHYSICAL_PAGE_MASK 65
66#define PHYSICAL_PAGE_MASK PAGE_MASK 66/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
67#endif 67 are never going to work. */
68static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
69{
70 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
71}
72
73static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
74{
75 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
76}
77static inline unsigned long page_to_dma_pfn(struct page *pg)
78{
79 return mm_to_dma_pfn(page_to_pfn(pg));
80}
81static inline unsigned long virt_to_dma_pfn(void *p)
82{
83 return page_to_dma_pfn(virt_to_page(p));
84}
68 85
69/* global iommu list, set NULL for ignored DMAR units */ 86/* global iommu list, set NULL for ignored DMAR units */
70static struct intel_iommu **g_iommus; 87static struct intel_iommu **g_iommus;
@@ -205,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
205 222
206static inline u64 dma_pte_addr(struct dma_pte *pte) 223static inline u64 dma_pte_addr(struct dma_pte *pte)
207{ 224{
208 return (pte->val & VTD_PAGE_MASK); 225#ifdef CONFIG_64BIT
226 return pte->val & VTD_PAGE_MASK;
227#else
228 /* Must have a full atomic 64-bit read */
229 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
230#endif
209} 231}
210 232
211static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 233static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
212{ 234{
213 pte->val |= (addr & VTD_PAGE_MASK); 235 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
214} 236}
215 237
216static inline bool dma_pte_present(struct dma_pte *pte) 238static inline bool dma_pte_present(struct dma_pte *pte)
@@ -218,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
218 return (pte->val & 3) != 0; 240 return (pte->val & 3) != 0;
219} 241}
220 242
243static inline int first_pte_in_page(struct dma_pte *pte)
244{
245 return !((unsigned long)pte & ~VTD_PAGE_MASK);
246}
247
221/* 248/*
222 * This domain is a statically identity mapping domain. 249 * This domain is a statically identity mapping domain.
223 * 1. This domain creats a static 1:1 mapping to all usable memory. 250 * 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -245,7 +272,6 @@ struct dmar_domain {
245 struct iova_domain iovad; /* iova's that belong to this domain */ 272 struct iova_domain iovad; /* iova's that belong to this domain */
246 273
247 struct dma_pte *pgd; /* virtual address */ 274 struct dma_pte *pgd; /* virtual address */
248 spinlock_t mapping_lock; /* page table lock */
249 int gaw; /* max guest address width */ 275 int gaw; /* max guest address width */
250 276
251 /* adjusted guest address width, 0 is level 2 30-bit */ 277 /* adjusted guest address width, 0 is level 2 30-bit */
@@ -649,80 +675,78 @@ static inline int width_to_agaw(int width)
649 675
650static inline unsigned int level_to_offset_bits(int level) 676static inline unsigned int level_to_offset_bits(int level)
651{ 677{
652 return (12 + (level - 1) * LEVEL_STRIDE); 678 return (level - 1) * LEVEL_STRIDE;
653} 679}
654 680
655static inline int address_level_offset(u64 addr, int level) 681static inline int pfn_level_offset(unsigned long pfn, int level)
656{ 682{
657 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); 683 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
658} 684}
659 685
660static inline u64 level_mask(int level) 686static inline unsigned long level_mask(int level)
661{ 687{
662 return ((u64)-1 << level_to_offset_bits(level)); 688 return -1UL << level_to_offset_bits(level);
663} 689}
664 690
665static inline u64 level_size(int level) 691static inline unsigned long level_size(int level)
666{ 692{
667 return ((u64)1 << level_to_offset_bits(level)); 693 return 1UL << level_to_offset_bits(level);
668} 694}
669 695
670static inline u64 align_to_level(u64 addr, int level) 696static inline unsigned long align_to_level(unsigned long pfn, int level)
671{ 697{
672 return ((addr + level_size(level) - 1) & level_mask(level)); 698 return (pfn + level_size(level) - 1) & level_mask(level);
673} 699}
674 700
675static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) 701static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
702 unsigned long pfn)
676{ 703{
677 int addr_width = agaw_to_width(domain->agaw); 704 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
678 struct dma_pte *parent, *pte = NULL; 705 struct dma_pte *parent, *pte = NULL;
679 int level = agaw_to_level(domain->agaw); 706 int level = agaw_to_level(domain->agaw);
680 int offset; 707 int offset;
681 unsigned long flags;
682 708
683 BUG_ON(!domain->pgd); 709 BUG_ON(!domain->pgd);
684 710 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
685 addr &= (((u64)1) << addr_width) - 1;
686 parent = domain->pgd; 711 parent = domain->pgd;
687 712
688 spin_lock_irqsave(&domain->mapping_lock, flags);
689 while (level > 0) { 713 while (level > 0) {
690 void *tmp_page; 714 void *tmp_page;
691 715
692 offset = address_level_offset(addr, level); 716 offset = pfn_level_offset(pfn, level);
693 pte = &parent[offset]; 717 pte = &parent[offset];
694 if (level == 1) 718 if (level == 1)
695 break; 719 break;
696 720
697 if (!dma_pte_present(pte)) { 721 if (!dma_pte_present(pte)) {
722 uint64_t pteval;
723
698 tmp_page = alloc_pgtable_page(); 724 tmp_page = alloc_pgtable_page();
699 725
700 if (!tmp_page) { 726 if (!tmp_page)
701 spin_unlock_irqrestore(&domain->mapping_lock,
702 flags);
703 return NULL; 727 return NULL;
728
729 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
730 pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
731 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
732 /* Someone else set it while we were thinking; use theirs. */
733 free_pgtable_page(tmp_page);
734 } else {
735 dma_pte_addr(pte);
736 domain_flush_cache(domain, pte, sizeof(*pte));
704 } 737 }
705 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
706 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
707 /*
708 * high level table always sets r/w, last level page
709 * table control read/write
710 */
711 dma_set_pte_readable(pte);
712 dma_set_pte_writable(pte);
713 domain_flush_cache(domain, pte, sizeof(*pte));
714 } 738 }
715 parent = phys_to_virt(dma_pte_addr(pte)); 739 parent = phys_to_virt(dma_pte_addr(pte));
716 level--; 740 level--;
717 } 741 }
718 742
719 spin_unlock_irqrestore(&domain->mapping_lock, flags);
720 return pte; 743 return pte;
721} 744}
722 745
723/* return address's pte at specific level */ 746/* return address's pte at specific level */
724static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, 747static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
725 int level) 748 unsigned long pfn,
749 int level)
726{ 750{
727 struct dma_pte *parent, *pte = NULL; 751 struct dma_pte *parent, *pte = NULL;
728 int total = agaw_to_level(domain->agaw); 752 int total = agaw_to_level(domain->agaw);
@@ -730,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
730 754
731 parent = domain->pgd; 755 parent = domain->pgd;
732 while (level <= total) { 756 while (level <= total) {
733 offset = address_level_offset(addr, total); 757 offset = pfn_level_offset(pfn, total);
734 pte = &parent[offset]; 758 pte = &parent[offset];
735 if (level == total) 759 if (level == total)
736 return pte; 760 return pte;
@@ -743,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
743 return NULL; 767 return NULL;
744} 768}
745 769
746/* clear one page's page table */
747static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
748{
749 struct dma_pte *pte = NULL;
750
751 /* get last level pte */
752 pte = dma_addr_level_pte(domain, addr, 1);
753
754 if (pte) {
755 dma_clear_pte(pte);
756 domain_flush_cache(domain, pte, sizeof(*pte));
757 }
758}
759
760/* clear last level pte, a tlb flush should be followed */ 770/* clear last level pte, a tlb flush should be followed */
761static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) 771static void dma_pte_clear_range(struct dmar_domain *domain,
772 unsigned long start_pfn,
773 unsigned long last_pfn)
762{ 774{
763 int addr_width = agaw_to_width(domain->agaw); 775 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
764 int npages; 776 struct dma_pte *first_pte, *pte;
765 777
766 start &= (((u64)1) << addr_width) - 1; 778 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
767 end &= (((u64)1) << addr_width) - 1; 779 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
768 /* in case it's partial page */
769 start &= PAGE_MASK;
770 end = PAGE_ALIGN(end);
771 npages = (end - start) / VTD_PAGE_SIZE;
772 780
773 /* we don't need lock here, nobody else touches the iova range */ 781 /* we don't need lock here; nobody else touches the iova range */
774 while (npages--) { 782 while (start_pfn <= last_pfn) {
775 dma_pte_clear_one(domain, start); 783 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
776 start += VTD_PAGE_SIZE; 784 if (!pte) {
785 start_pfn = align_to_level(start_pfn + 1, 2);
786 continue;
787 }
788 do {
789 dma_clear_pte(pte);
790 start_pfn++;
791 pte++;
792 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
793
794 domain_flush_cache(domain, first_pte,
795 (void *)pte - (void *)first_pte);
777 } 796 }
778} 797}
779 798
780/* free page table pages. last level pte should already be cleared */ 799/* free page table pages. last level pte should already be cleared */
781static void dma_pte_free_pagetable(struct dmar_domain *domain, 800static void dma_pte_free_pagetable(struct dmar_domain *domain,
782 u64 start, u64 end) 801 unsigned long start_pfn,
802 unsigned long last_pfn)
783{ 803{
784 int addr_width = agaw_to_width(domain->agaw); 804 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
785 struct dma_pte *pte; 805 struct dma_pte *first_pte, *pte;
786 int total = agaw_to_level(domain->agaw); 806 int total = agaw_to_level(domain->agaw);
787 int level; 807 int level;
788 u64 tmp; 808 unsigned long tmp;
789 809
790 start &= (((u64)1) << addr_width) - 1; 810 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
791 end &= (((u64)1) << addr_width) - 1; 811 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
792 812
793 /* we don't need lock here, nobody else touches the iova range */ 813 /* We don't need lock here; nobody else touches the iova range */
794 level = 2; 814 level = 2;
795 while (level <= total) { 815 while (level <= total) {
796 tmp = align_to_level(start, level); 816 tmp = align_to_level(start_pfn, level);
797 if (tmp >= end || (tmp + level_size(level) > end)) 817
818 /* If we can't even clear one PTE at this level, we're done */
819 if (tmp + level_size(level) - 1 > last_pfn)
798 return; 820 return;
799 821
800 while (tmp < end) { 822 while (tmp + level_size(level) - 1 <= last_pfn) {
801 pte = dma_addr_level_pte(domain, tmp, level); 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
802 if (pte) { 824 if (!pte) {
803 free_pgtable_page( 825 tmp = align_to_level(tmp + 1, level + 1);
804 phys_to_virt(dma_pte_addr(pte))); 826 continue;
805 dma_clear_pte(pte);
806 domain_flush_cache(domain, pte, sizeof(*pte));
807 } 827 }
808 tmp += level_size(level); 828 do {
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
833 pte++;
834 tmp += level_size(level);
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
809 } 841 }
810 level++; 842 level++;
811 } 843 }
812 /* free pgd */ 844 /* free pgd */
813 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
814 free_pgtable_page(domain->pgd); 846 free_pgtable_page(domain->pgd);
815 domain->pgd = NULL; 847 domain->pgd = NULL;
816 } 848 }
@@ -1036,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1036} 1068}
1037 1069
1038static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1039 u64 addr, unsigned int pages) 1071 unsigned long pfn, unsigned int pages)
1040{ 1072{
1041 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1042 1075
1043 BUG_ON(addr & (~VTD_PAGE_MASK));
1044 BUG_ON(pages == 0); 1076 BUG_ON(pages == 0);
1045 1077
1046 /* 1078 /*
@@ -1055,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1055 else 1087 else
1056 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1057 DMA_TLB_PSI_FLUSH); 1089 DMA_TLB_PSI_FLUSH);
1058 if (did) 1090
1091 /*
1092 * In caching mode, domain ID 0 is reserved for non-present to present
1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
1059 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1060} 1097}
1061 1098
@@ -1280,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
1280 struct pci_dev *pdev = NULL; 1317 struct pci_dev *pdev = NULL;
1281 struct iova *iova; 1318 struct iova *iova;
1282 int i; 1319 int i;
1283 u64 addr, size;
1284 1320
1285 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1321 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1286 1322
@@ -1303,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
1303 r = &pdev->resource[i]; 1339 r = &pdev->resource[i];
1304 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1305 continue; 1341 continue;
1306 addr = r->start; 1342 iova = reserve_iova(&reserved_iova_list,
1307 addr &= PHYSICAL_PAGE_MASK; 1343 IOVA_PFN(r->start),
1308 size = r->end - addr; 1344 IOVA_PFN(r->end));
1309 size = PAGE_ALIGN(size);
1310 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1311 IOVA_PFN(size + addr) - 1);
1312 if (!iova) 1345 if (!iova)
1313 printk(KERN_ERR "Reserve iova failed\n"); 1346 printk(KERN_ERR "Reserve iova failed\n");
1314 } 1347 }
@@ -1342,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1342 unsigned long sagaw; 1375 unsigned long sagaw;
1343 1376
1344 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1345 spin_lock_init(&domain->mapping_lock);
1346 spin_lock_init(&domain->iommu_lock); 1378 spin_lock_init(&domain->iommu_lock);
1347 1379
1348 domain_reserve_special_ranges(domain); 1380 domain_reserve_special_ranges(domain);
@@ -1389,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
1389{ 1421{
1390 struct dmar_drhd_unit *drhd; 1422 struct dmar_drhd_unit *drhd;
1391 struct intel_iommu *iommu; 1423 struct intel_iommu *iommu;
1392 u64 end;
1393 1424
1394 /* Domain 0 is reserved, so dont process it */ 1425 /* Domain 0 is reserved, so dont process it */
1395 if (!domain) 1426 if (!domain)
@@ -1398,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
1398 domain_remove_dev_info(domain); 1429 domain_remove_dev_info(domain);
1399 /* destroy iovas */ 1430 /* destroy iovas */
1400 put_iova_domain(&domain->iovad); 1431 put_iova_domain(&domain->iovad);
1401 end = DOMAIN_MAX_ADDR(domain->gaw);
1402 end = end & (~PAGE_MASK);
1403 1432
1404 /* clear ptes */ 1433 /* clear ptes */
1405 dma_pte_clear_range(domain, 0, end); 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1406 1435
1407 /* free page tables */ 1436 /* free page tables */
1408 dma_pte_free_pagetable(domain, 0, end); 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1409 1438
1410 for_each_active_iommu(iommu, drhd) 1439 for_each_active_iommu(iommu, drhd)
1411 if (test_bit(iommu->seq_id, &domain->iommu_bmp)) 1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
@@ -1476,7 +1505,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1476 } 1505 }
1477 1506
1478 set_bit(num, iommu->domain_ids); 1507 set_bit(num, iommu->domain_ids);
1479 set_bit(iommu->seq_id, &domain->iommu_bmp);
1480 iommu->domains[num] = domain; 1508 iommu->domains[num] = domain;
1481 id = num; 1509 id = num;
1482 } 1510 }
@@ -1619,42 +1647,94 @@ static int domain_context_mapped(struct pci_dev *pdev)
1619 tmp->devfn); 1647 tmp->devfn);
1620} 1648}
1621 1649
1622static int 1650/* Returns a number of VTD pages, but aligned to MM page size */
1623domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 1651static inline unsigned long aligned_nrpages(unsigned long host_addr,
1624 u64 hpa, size_t size, int prot) 1652 size_t size)
1625{ 1653{
1626 u64 start_pfn, end_pfn; 1654 host_addr &= ~PAGE_MASK;
1627 struct dma_pte *pte; 1655 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1628 int index; 1656}
1629 int addr_width = agaw_to_width(domain->agaw);
1630 1657
1631 hpa &= (((u64)1) << addr_width) - 1; 1658static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1659 struct scatterlist *sg, unsigned long phys_pfn,
1660 unsigned long nr_pages, int prot)
1661{
1662 struct dma_pte *first_pte = NULL, *pte = NULL;
1663 phys_addr_t uninitialized_var(pteval);
1664 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1665 unsigned long sg_res;
1666
1667 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1632 1668
1633 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1669 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1634 return -EINVAL; 1670 return -EINVAL;
1635 iova &= PAGE_MASK; 1671
1636 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; 1672 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1637 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; 1673
1638 index = 0; 1674 if (sg)
1639 while (start_pfn < end_pfn) { 1675 sg_res = 0;
1640 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); 1676 else {
1641 if (!pte) 1677 sg_res = nr_pages + 1;
1642 return -ENOMEM; 1678 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1679 }
1680
1681 while (nr_pages--) {
1682 uint64_t tmp;
1683
1684 if (!sg_res) {
1685 sg_res = aligned_nrpages(sg->offset, sg->length);
1686 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1687 sg->dma_length = sg->length;
1688 pteval = page_to_phys(sg_page(sg)) | prot;
1689 }
1690 if (!pte) {
1691 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1692 if (!pte)
1693 return -ENOMEM;
1694 }
1643 /* We don't need lock here, nobody else 1695 /* We don't need lock here, nobody else
1644 * touches the iova range 1696 * touches the iova range
1645 */ 1697 */
1646 BUG_ON(dma_pte_addr(pte)); 1698 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1647 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1699 if (tmp) {
1648 dma_set_pte_prot(pte, prot); 1700 static int dumps = 5;
1649 if (prot & DMA_PTE_SNP) 1701 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1650 dma_set_pte_snp(pte); 1702 iov_pfn, tmp, (unsigned long long)pteval);
1651 domain_flush_cache(domain, pte, sizeof(*pte)); 1703 if (dumps) {
1652 start_pfn++; 1704 dumps--;
1653 index++; 1705 debug_dma_dump_mappings(NULL);
1706 }
1707 WARN_ON(1);
1708 }
1709 pte++;
1710 if (!nr_pages || first_pte_in_page(pte)) {
1711 domain_flush_cache(domain, first_pte,
1712 (void *)pte - (void *)first_pte);
1713 pte = NULL;
1714 }
1715 iov_pfn++;
1716 pteval += VTD_PAGE_SIZE;
1717 sg_res--;
1718 if (!sg_res)
1719 sg = sg_next(sg);
1654 } 1720 }
1655 return 0; 1721 return 0;
1656} 1722}
1657 1723
1724static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1725 struct scatterlist *sg, unsigned long nr_pages,
1726 int prot)
1727{
1728 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1729}
1730
1731static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1732 unsigned long phys_pfn, unsigned long nr_pages,
1733 int prot)
1734{
1735 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1736}
1737
1658static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1738static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1659{ 1739{
1660 if (!iommu) 1740 if (!iommu)
@@ -1845,58 +1925,61 @@ error:
1845 1925
1846static int iommu_identity_mapping; 1926static int iommu_identity_mapping;
1847 1927
1928static int iommu_domain_identity_map(struct dmar_domain *domain,
1929 unsigned long long start,
1930 unsigned long long end)
1931{
1932 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1933 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1934
1935 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1936 dma_to_mm_pfn(last_vpfn))) {
1937 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1938 return -ENOMEM;
1939 }
1940
1941 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1942 start, end, domain->id);
1943 /*
1944 * RMRR range might have overlap with physical memory range,
1945 * clear it first
1946 */
1947 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1948
1949 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1950 last_vpfn - first_vpfn + 1,
1951 DMA_PTE_READ|DMA_PTE_WRITE);
1952}
1953
1848static int iommu_prepare_identity_map(struct pci_dev *pdev, 1954static int iommu_prepare_identity_map(struct pci_dev *pdev,
1849 unsigned long long start, 1955 unsigned long long start,
1850 unsigned long long end) 1956 unsigned long long end)
1851{ 1957{
1852 struct dmar_domain *domain; 1958 struct dmar_domain *domain;
1853 unsigned long size;
1854 unsigned long long base;
1855 int ret; 1959 int ret;
1856 1960
1857 printk(KERN_INFO 1961 printk(KERN_INFO
1858 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1962 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1859 pci_name(pdev), start, end); 1963 pci_name(pdev), start, end);
1860 if (iommu_identity_mapping) 1964
1861 domain = si_domain; 1965 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1862 else
1863 /* page table init */
1864 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1865 if (!domain) 1966 if (!domain)
1866 return -ENOMEM; 1967 return -ENOMEM;
1867 1968
1868 /* The address might not be aligned */ 1969 ret = iommu_domain_identity_map(domain, start, end);
1869 base = start & PAGE_MASK;
1870 size = end - base;
1871 size = PAGE_ALIGN(size);
1872 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1873 IOVA_PFN(base + size) - 1)) {
1874 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1875 ret = -ENOMEM;
1876 goto error;
1877 }
1878
1879 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1880 size, base, pci_name(pdev));
1881 /*
1882 * RMRR range might have overlap with physical memory range,
1883 * clear it first
1884 */
1885 dma_pte_clear_range(domain, base, base + size);
1886
1887 ret = domain_page_mapping(domain, base, base, size,
1888 DMA_PTE_READ|DMA_PTE_WRITE);
1889 if (ret) 1970 if (ret)
1890 goto error; 1971 goto error;
1891 1972
1892 /* context entry init */ 1973 /* context entry init */
1893 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 1974 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1894 if (!ret) 1975 if (ret)
1895 return 0; 1976 goto error;
1896error: 1977
1978 return 0;
1979
1980 error:
1897 domain_exit(domain); 1981 domain_exit(domain);
1898 return ret; 1982 return ret;
1899
1900} 1983}
1901 1984
1902static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 1985static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1908,64 +1991,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1908 rmrr->end_address + 1); 1991 rmrr->end_address + 1);
1909} 1992}
1910 1993
1911#ifdef CONFIG_DMAR_GFX_WA
1912struct iommu_prepare_data {
1913 struct pci_dev *pdev;
1914 int ret;
1915};
1916
1917static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1918 unsigned long end_pfn, void *datax)
1919{
1920 struct iommu_prepare_data *data;
1921
1922 data = (struct iommu_prepare_data *)datax;
1923
1924 data->ret = iommu_prepare_identity_map(data->pdev,
1925 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1926 return data->ret;
1927
1928}
1929
1930static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1931{
1932 int nid;
1933 struct iommu_prepare_data data;
1934
1935 data.pdev = pdev;
1936 data.ret = 0;
1937
1938 for_each_online_node(nid) {
1939 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1940 if (data.ret)
1941 return data.ret;
1942 }
1943 return data.ret;
1944}
1945
1946static void __init iommu_prepare_gfx_mapping(void)
1947{
1948 struct pci_dev *pdev = NULL;
1949 int ret;
1950
1951 for_each_pci_dev(pdev) {
1952 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1953 !IS_GFX_DEVICE(pdev))
1954 continue;
1955 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1956 pci_name(pdev));
1957 ret = iommu_prepare_with_active_regions(pdev);
1958 if (ret)
1959 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1960 }
1961}
1962#else /* !CONFIG_DMAR_GFX_WA */
1963static inline void iommu_prepare_gfx_mapping(void)
1964{
1965 return;
1966}
1967#endif
1968
1969#ifdef CONFIG_DMAR_FLOPPY_WA 1994#ifdef CONFIG_DMAR_FLOPPY_WA
1970static inline void iommu_prepare_isa(void) 1995static inline void iommu_prepare_isa(void)
1971{ 1996{
@@ -1976,12 +2001,12 @@ static inline void iommu_prepare_isa(void)
1976 if (!pdev) 2001 if (!pdev)
1977 return; 2002 return;
1978 2003
1979 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); 2004 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
1980 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 2005 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1981 2006
1982 if (ret) 2007 if (ret)
1983 printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " 2008 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
1984 "floppy might not work\n"); 2009 "floppy might not work\n");
1985 2010
1986} 2011}
1987#else 2012#else
@@ -2009,16 +2034,30 @@ static int __init init_context_pass_through(void)
2009} 2034}
2010 2035
2011static int md_domain_init(struct dmar_domain *domain, int guest_width); 2036static int md_domain_init(struct dmar_domain *domain, int guest_width);
2037
2038static int __init si_domain_work_fn(unsigned long start_pfn,
2039 unsigned long end_pfn, void *datax)
2040{
2041 int *ret = datax;
2042
2043 *ret = iommu_domain_identity_map(si_domain,
2044 (uint64_t)start_pfn << PAGE_SHIFT,
2045 (uint64_t)end_pfn << PAGE_SHIFT);
2046 return *ret;
2047
2048}
2049
2012static int si_domain_init(void) 2050static int si_domain_init(void)
2013{ 2051{
2014 struct dmar_drhd_unit *drhd; 2052 struct dmar_drhd_unit *drhd;
2015 struct intel_iommu *iommu; 2053 struct intel_iommu *iommu;
2016 int ret = 0; 2054 int nid, ret = 0;
2017 2055
2018 si_domain = alloc_domain(); 2056 si_domain = alloc_domain();
2019 if (!si_domain) 2057 if (!si_domain)
2020 return -EFAULT; 2058 return -EFAULT;
2021 2059
2060 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2022 2061
2023 for_each_active_iommu(iommu, drhd) { 2062 for_each_active_iommu(iommu, drhd) {
2024 ret = iommu_attach_domain(si_domain, iommu); 2063 ret = iommu_attach_domain(si_domain, iommu);
@@ -2035,6 +2074,12 @@ static int si_domain_init(void)
2035 2074
2036 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2075 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2037 2076
2077 for_each_online_node(nid) {
2078 work_with_active_regions(nid, si_domain_work_fn, &ret);
2079 if (ret)
2080 return ret;
2081 }
2082
2038 return 0; 2083 return 0;
2039} 2084}
2040 2085
@@ -2079,9 +2124,49 @@ static int domain_add_dev_info(struct dmar_domain *domain,
2079 return 0; 2124 return 0;
2080} 2125}
2081 2126
2127static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2128{
2129 if (iommu_identity_mapping == 2)
2130 return IS_GFX_DEVICE(pdev);
2131
2132 /*
2133 * We want to start off with all devices in the 1:1 domain, and
2134 * take them out later if we find they can't access all of memory.
2135 *
2136 * However, we can't do this for PCI devices behind bridges,
2137 * because all PCI devices behind the same bridge will end up
2138 * with the same source-id on their transactions.
2139 *
2140 * Practically speaking, we can't change things around for these
2141 * devices at run-time, because we can't be sure there'll be no
2142 * DMA transactions in flight for any of their siblings.
2143 *
2144 * So PCI devices (unless they're on the root bus) as well as
2145 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2146 * the 1:1 domain, just in _case_ one of their siblings turns out
2147 * not to be able to map all of memory.
2148 */
2149 if (!pdev->is_pcie) {
2150 if (!pci_is_root_bus(pdev->bus))
2151 return 0;
2152 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2153 return 0;
2154 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2155 return 0;
2156
2157 /*
2158 * At boot time, we don't yet know if devices will be 64-bit capable.
2159 * Assume that they will -- if they turn out not to be, then we can
2160 * take them out of the 1:1 domain later.
2161 */
2162 if (!startup)
2163 return pdev->dma_mask > DMA_BIT_MASK(32);
2164
2165 return 1;
2166}
2167
2082static int iommu_prepare_static_identity_mapping(void) 2168static int iommu_prepare_static_identity_mapping(void)
2083{ 2169{
2084 int i;
2085 struct pci_dev *pdev = NULL; 2170 struct pci_dev *pdev = NULL;
2086 int ret; 2171 int ret;
2087 2172
@@ -2089,23 +2174,19 @@ static int iommu_prepare_static_identity_mapping(void)
2089 if (ret) 2174 if (ret)
2090 return -EFAULT; 2175 return -EFAULT;
2091 2176
2092 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2093 for_each_pci_dev(pdev) { 2177 for_each_pci_dev(pdev) {
2094 for (i = 0; i < e820.nr_map; i++) { 2178 if (iommu_should_identity_map(pdev, 1)) {
2095 struct e820entry *ei = &e820.map[i]; 2179 printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
2096 2180 pci_name(pdev));
2097 if (ei->type == E820_RAM) { 2181
2098 ret = iommu_prepare_identity_map(pdev, 2182 ret = domain_context_mapping(si_domain, pdev,
2099 ei->addr, ei->addr + ei->size); 2183 CONTEXT_TT_MULTI_LEVEL);
2100 if (ret) { 2184 if (ret)
2101 printk(KERN_INFO "1:1 mapping to one domain failed.\n"); 2185 return ret;
2102 return -EFAULT; 2186 ret = domain_add_dev_info(si_domain, pdev);
2103 } 2187 if (ret)
2104 } 2188 return ret;
2105 } 2189 }
2106 ret = domain_add_dev_info(si_domain, pdev);
2107 if (ret)
2108 return ret;
2109 } 2190 }
2110 2191
2111 return 0; 2192 return 0;
@@ -2260,6 +2341,10 @@ int __init init_dmars(void)
2260 * identity mapping if iommu_identity_mapping is set. 2341 * identity mapping if iommu_identity_mapping is set.
2261 */ 2342 */
2262 if (!iommu_pass_through) { 2343 if (!iommu_pass_through) {
2344#ifdef CONFIG_DMAR_BROKEN_GFX_WA
2345 if (!iommu_identity_mapping)
2346 iommu_identity_mapping = 2;
2347#endif
2263 if (iommu_identity_mapping) 2348 if (iommu_identity_mapping)
2264 iommu_prepare_static_identity_mapping(); 2349 iommu_prepare_static_identity_mapping();
2265 /* 2350 /*
@@ -2293,8 +2378,6 @@ int __init init_dmars(void)
2293 } 2378 }
2294 } 2379 }
2295 2380
2296 iommu_prepare_gfx_mapping();
2297
2298 iommu_prepare_isa(); 2381 iommu_prepare_isa();
2299 } 2382 }
2300 2383
@@ -2339,50 +2422,32 @@ error:
2339 return ret; 2422 return ret;
2340} 2423}
2341 2424
2342static inline u64 aligned_size(u64 host_addr, size_t size) 2425/* This takes a number of _MM_ pages, not VTD pages */
2343{ 2426static struct iova *intel_alloc_iova(struct device *dev,
2344 u64 addr; 2427 struct dmar_domain *domain,
2345 addr = (host_addr & (~PAGE_MASK)) + size; 2428 unsigned long nrpages, uint64_t dma_mask)
2346 return PAGE_ALIGN(addr);
2347}
2348
2349struct iova *
2350iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2351{
2352 struct iova *piova;
2353
2354 /* Make sure it's in range */
2355 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2356 if (!size || (IOVA_START_ADDR + size > end))
2357 return NULL;
2358
2359 piova = alloc_iova(&domain->iovad,
2360 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2361 return piova;
2362}
2363
2364static struct iova *
2365__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2366 size_t size, u64 dma_mask)
2367{ 2429{
2368 struct pci_dev *pdev = to_pci_dev(dev); 2430 struct pci_dev *pdev = to_pci_dev(dev);
2369 struct iova *iova = NULL; 2431 struct iova *iova = NULL;
2370 2432
2371 if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) 2433 /* Restrict dma_mask to the width that the iommu can handle */
2372 iova = iommu_alloc_iova(domain, size, dma_mask); 2434 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2373 else { 2435
2436 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2374 /* 2437 /*
2375 * First try to allocate an io virtual address in 2438 * First try to allocate an io virtual address in
2376 * DMA_BIT_MASK(32) and if that fails then try allocating 2439 * DMA_BIT_MASK(32) and if that fails then try allocating
2377 * from higher range 2440 * from higher range
2378 */ 2441 */
2379 iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); 2442 iova = alloc_iova(&domain->iovad, nrpages,
2380 if (!iova) 2443 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2381 iova = iommu_alloc_iova(domain, size, dma_mask); 2444 if (iova)
2382 } 2445 return iova;
2383 2446 }
2384 if (!iova) { 2447 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2385 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 2448 if (unlikely(!iova)) {
2449 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2450 nrpages, pci_name(pdev));
2386 return NULL; 2451 return NULL;
2387 } 2452 }
2388 2453
@@ -2424,16 +2489,24 @@ static int iommu_dummy(struct pci_dev *pdev)
2424} 2489}
2425 2490
2426/* Check if the pdev needs to go through non-identity map and unmap process.*/ 2491/* Check if the pdev needs to go through non-identity map and unmap process.*/
2427static int iommu_no_mapping(struct pci_dev *pdev) 2492static int iommu_no_mapping(struct device *dev)
2428{ 2493{
2494 struct pci_dev *pdev;
2429 int found; 2495 int found;
2430 2496
2497 if (unlikely(dev->bus != &pci_bus_type))
2498 return 1;
2499
2500 pdev = to_pci_dev(dev);
2501 if (iommu_dummy(pdev))
2502 return 1;
2503
2431 if (!iommu_identity_mapping) 2504 if (!iommu_identity_mapping)
2432 return iommu_dummy(pdev); 2505 return 0;
2433 2506
2434 found = identity_mapping(pdev); 2507 found = identity_mapping(pdev);
2435 if (found) { 2508 if (found) {
2436 if (pdev->dma_mask > DMA_BIT_MASK(32)) 2509 if (iommu_should_identity_map(pdev, 0))
2437 return 1; 2510 return 1;
2438 else { 2511 else {
2439 /* 2512 /*
@@ -2450,9 +2523,12 @@ static int iommu_no_mapping(struct pci_dev *pdev)
2450 * In case of a detached 64 bit DMA device from vm, the device 2523 * In case of a detached 64 bit DMA device from vm, the device
2451 * is put into si_domain for identity mapping. 2524 * is put into si_domain for identity mapping.
2452 */ 2525 */
2453 if (pdev->dma_mask > DMA_BIT_MASK(32)) { 2526 if (iommu_should_identity_map(pdev, 0)) {
2454 int ret; 2527 int ret;
2455 ret = domain_add_dev_info(si_domain, pdev); 2528 ret = domain_add_dev_info(si_domain, pdev);
2529 if (ret)
2530 return 0;
2531 ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2456 if (!ret) { 2532 if (!ret) {
2457 printk(KERN_INFO "64bit %s uses identity mapping\n", 2533 printk(KERN_INFO "64bit %s uses identity mapping\n",
2458 pci_name(pdev)); 2534 pci_name(pdev));
@@ -2461,7 +2537,7 @@ static int iommu_no_mapping(struct pci_dev *pdev)
2461 } 2537 }
2462 } 2538 }
2463 2539
2464 return iommu_dummy(pdev); 2540 return 0;
2465} 2541}
2466 2542
2467static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, 2543static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
@@ -2474,10 +2550,11 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2474 int prot = 0; 2550 int prot = 0;
2475 int ret; 2551 int ret;
2476 struct intel_iommu *iommu; 2552 struct intel_iommu *iommu;
2553 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2477 2554
2478 BUG_ON(dir == DMA_NONE); 2555 BUG_ON(dir == DMA_NONE);
2479 2556
2480 if (iommu_no_mapping(pdev)) 2557 if (iommu_no_mapping(hwdev))
2481 return paddr; 2558 return paddr;
2482 2559
2483 domain = get_valid_domain_for_dev(pdev); 2560 domain = get_valid_domain_for_dev(pdev);
@@ -2485,14 +2562,13 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2485 return 0; 2562 return 0;
2486 2563
2487 iommu = domain_get_iommu(domain); 2564 iommu = domain_get_iommu(domain);
2488 size = aligned_size((u64)paddr, size); 2565 size = aligned_nrpages(paddr, size);
2489 2566
2490 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2567 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2568 pdev->dma_mask);
2491 if (!iova) 2569 if (!iova)
2492 goto error; 2570 goto error;
2493 2571
2494 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2495
2496 /* 2572 /*
2497 * Check if DMAR supports zero-length reads on write only 2573 * Check if DMAR supports zero-length reads on write only
2498 * mappings.. 2574 * mappings..
@@ -2508,20 +2584,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2508 * might have two guest_addr mapping to the same host paddr, but this 2584 * might have two guest_addr mapping to the same host paddr, but this
2509 * is not a big problem 2585 * is not a big problem
2510 */ 2586 */
2511 ret = domain_page_mapping(domain, start_paddr, 2587 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2512 ((u64)paddr) & PHYSICAL_PAGE_MASK, 2588 mm_to_dma_pfn(paddr_pfn), size, prot);
2513 size, prot);
2514 if (ret) 2589 if (ret)
2515 goto error; 2590 goto error;
2516 2591
2517 /* it's a non-present to present mapping. Only flush if caching mode */ 2592 /* it's a non-present to present mapping. Only flush if caching mode */
2518 if (cap_caching_mode(iommu->cap)) 2593 if (cap_caching_mode(iommu->cap))
2519 iommu_flush_iotlb_psi(iommu, 0, start_paddr, 2594 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2520 size >> VTD_PAGE_SHIFT);
2521 else 2595 else
2522 iommu_flush_write_buffer(iommu); 2596 iommu_flush_write_buffer(iommu);
2523 2597
2524 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2598 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2599 start_paddr += paddr & ~PAGE_MASK;
2600 return start_paddr;
2525 2601
2526error: 2602error:
2527 if (iova) 2603 if (iova)
@@ -2614,11 +2690,11 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2614{ 2690{
2615 struct pci_dev *pdev = to_pci_dev(dev); 2691 struct pci_dev *pdev = to_pci_dev(dev);
2616 struct dmar_domain *domain; 2692 struct dmar_domain *domain;
2617 unsigned long start_addr; 2693 unsigned long start_pfn, last_pfn;
2618 struct iova *iova; 2694 struct iova *iova;
2619 struct intel_iommu *iommu; 2695 struct intel_iommu *iommu;
2620 2696
2621 if (iommu_no_mapping(pdev)) 2697 if (iommu_no_mapping(dev))
2622 return; 2698 return;
2623 2699
2624 domain = find_domain(pdev); 2700 domain = find_domain(pdev);
@@ -2627,22 +2703,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2627 iommu = domain_get_iommu(domain); 2703 iommu = domain_get_iommu(domain);
2628 2704
2629 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2705 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2630 if (!iova) 2706 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2707 (unsigned long long)dev_addr))
2631 return; 2708 return;
2632 2709
2633 start_addr = iova->pfn_lo << PAGE_SHIFT; 2710 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2634 size = aligned_size((u64)dev_addr, size); 2711 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2635 2712
2636 pr_debug("Device %s unmapping: %zx@%llx\n", 2713 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2637 pci_name(pdev), size, (unsigned long long)start_addr); 2714 pci_name(pdev), start_pfn, last_pfn);
2638 2715
2639 /* clear the whole page */ 2716 /* clear the whole page */
2640 dma_pte_clear_range(domain, start_addr, start_addr + size); 2717 dma_pte_clear_range(domain, start_pfn, last_pfn);
2718
2641 /* free page tables */ 2719 /* free page tables */
2642 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2720 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2721
2643 if (intel_iommu_strict) { 2722 if (intel_iommu_strict) {
2644 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2723 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2645 size >> VTD_PAGE_SHIFT); 2724 last_pfn - start_pfn + 1);
2646 /* free iova */ 2725 /* free iova */
2647 __free_iova(&domain->iovad, iova); 2726 __free_iova(&domain->iovad, iova);
2648 } else { 2727 } else {
@@ -2700,17 +2779,13 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2700 int nelems, enum dma_data_direction dir, 2779 int nelems, enum dma_data_direction dir,
2701 struct dma_attrs *attrs) 2780 struct dma_attrs *attrs)
2702{ 2781{
2703 int i;
2704 struct pci_dev *pdev = to_pci_dev(hwdev); 2782 struct pci_dev *pdev = to_pci_dev(hwdev);
2705 struct dmar_domain *domain; 2783 struct dmar_domain *domain;
2706 unsigned long start_addr; 2784 unsigned long start_pfn, last_pfn;
2707 struct iova *iova; 2785 struct iova *iova;
2708 size_t size = 0;
2709 phys_addr_t addr;
2710 struct scatterlist *sg;
2711 struct intel_iommu *iommu; 2786 struct intel_iommu *iommu;
2712 2787
2713 if (iommu_no_mapping(pdev)) 2788 if (iommu_no_mapping(hwdev))
2714 return; 2789 return;
2715 2790
2716 domain = find_domain(pdev); 2791 domain = find_domain(pdev);
@@ -2719,22 +2794,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2719 iommu = domain_get_iommu(domain); 2794 iommu = domain_get_iommu(domain);
2720 2795
2721 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2796 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2722 if (!iova) 2797 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2798 (unsigned long long)sglist[0].dma_address))
2723 return; 2799 return;
2724 for_each_sg(sglist, sg, nelems, i) {
2725 addr = page_to_phys(sg_page(sg)) + sg->offset;
2726 size += aligned_size((u64)addr, sg->length);
2727 }
2728 2800
2729 start_addr = iova->pfn_lo << PAGE_SHIFT; 2801 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2802 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2730 2803
2731 /* clear the whole page */ 2804 /* clear the whole page */
2732 dma_pte_clear_range(domain, start_addr, start_addr + size); 2805 dma_pte_clear_range(domain, start_pfn, last_pfn);
2806
2733 /* free page tables */ 2807 /* free page tables */
2734 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2808 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2735 2809
2736 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2810 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2737 size >> VTD_PAGE_SHIFT); 2811 (last_pfn - start_pfn + 1));
2738 2812
2739 /* free iova */ 2813 /* free iova */
2740 __free_iova(&domain->iovad, iova); 2814 __free_iova(&domain->iovad, iova);
@@ -2757,21 +2831,20 @@ static int intel_nontranslate_map_sg(struct device *hddev,
2757static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2831static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2758 enum dma_data_direction dir, struct dma_attrs *attrs) 2832 enum dma_data_direction dir, struct dma_attrs *attrs)
2759{ 2833{
2760 phys_addr_t addr;
2761 int i; 2834 int i;
2762 struct pci_dev *pdev = to_pci_dev(hwdev); 2835 struct pci_dev *pdev = to_pci_dev(hwdev);
2763 struct dmar_domain *domain; 2836 struct dmar_domain *domain;
2764 size_t size = 0; 2837 size_t size = 0;
2765 int prot = 0; 2838 int prot = 0;
2766 size_t offset = 0; 2839 size_t offset_pfn = 0;
2767 struct iova *iova = NULL; 2840 struct iova *iova = NULL;
2768 int ret; 2841 int ret;
2769 struct scatterlist *sg; 2842 struct scatterlist *sg;
2770 unsigned long start_addr; 2843 unsigned long start_vpfn;
2771 struct intel_iommu *iommu; 2844 struct intel_iommu *iommu;
2772 2845
2773 BUG_ON(dir == DMA_NONE); 2846 BUG_ON(dir == DMA_NONE);
2774 if (iommu_no_mapping(pdev)) 2847 if (iommu_no_mapping(hwdev))
2775 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); 2848 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2776 2849
2777 domain = get_valid_domain_for_dev(pdev); 2850 domain = get_valid_domain_for_dev(pdev);
@@ -2780,12 +2853,11 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2780 2853
2781 iommu = domain_get_iommu(domain); 2854 iommu = domain_get_iommu(domain);
2782 2855
2783 for_each_sg(sglist, sg, nelems, i) { 2856 for_each_sg(sglist, sg, nelems, i)
2784 addr = page_to_phys(sg_page(sg)) + sg->offset; 2857 size += aligned_nrpages(sg->offset, sg->length);
2785 size += aligned_size((u64)addr, sg->length);
2786 }
2787 2858
2788 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2859 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2860 pdev->dma_mask);
2789 if (!iova) { 2861 if (!iova) {
2790 sglist->dma_length = 0; 2862 sglist->dma_length = 0;
2791 return 0; 2863 return 0;
@@ -2801,35 +2873,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2801 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2873 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2802 prot |= DMA_PTE_WRITE; 2874 prot |= DMA_PTE_WRITE;
2803 2875
2804 start_addr = iova->pfn_lo << PAGE_SHIFT; 2876 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2805 offset = 0; 2877
2806 for_each_sg(sglist, sg, nelems, i) { 2878 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
2807 addr = page_to_phys(sg_page(sg)) + sg->offset; 2879 if (unlikely(ret)) {
2808 size = aligned_size((u64)addr, sg->length); 2880 /* clear the page */
2809 ret = domain_page_mapping(domain, start_addr + offset, 2881 dma_pte_clear_range(domain, start_vpfn,
2810 ((u64)addr) & PHYSICAL_PAGE_MASK, 2882 start_vpfn + size - 1);
2811 size, prot); 2883 /* free page tables */
2812 if (ret) { 2884 dma_pte_free_pagetable(domain, start_vpfn,
2813 /* clear the page */ 2885 start_vpfn + size - 1);
2814 dma_pte_clear_range(domain, start_addr, 2886 /* free iova */
2815 start_addr + offset); 2887 __free_iova(&domain->iovad, iova);
2816 /* free page tables */ 2888 return 0;
2817 dma_pte_free_pagetable(domain, start_addr,
2818 start_addr + offset);
2819 /* free iova */
2820 __free_iova(&domain->iovad, iova);
2821 return 0;
2822 }
2823 sg->dma_address = start_addr + offset +
2824 ((u64)addr & (~PAGE_MASK));
2825 sg->dma_length = sg->length;
2826 offset += size;
2827 } 2889 }
2828 2890
2829 /* it's a non-present to present mapping. Only flush if caching mode */ 2891 /* it's a non-present to present mapping. Only flush if caching mode */
2830 if (cap_caching_mode(iommu->cap)) 2892 if (cap_caching_mode(iommu->cap))
2831 iommu_flush_iotlb_psi(iommu, 0, start_addr, 2893 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2832 offset >> VTD_PAGE_SHIFT);
2833 else 2894 else
2834 iommu_flush_write_buffer(iommu); 2895 iommu_flush_write_buffer(iommu);
2835 2896
@@ -3334,7 +3395,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3334 int adjust_width; 3395 int adjust_width;
3335 3396
3336 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3397 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3337 spin_lock_init(&domain->mapping_lock);
3338 spin_lock_init(&domain->iommu_lock); 3398 spin_lock_init(&domain->iommu_lock);
3339 3399
3340 domain_reserve_special_ranges(domain); 3400 domain_reserve_special_ranges(domain);
@@ -3348,6 +3408,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3348 3408
3349 domain->iommu_count = 0; 3409 domain->iommu_count = 0;
3350 domain->iommu_coherency = 0; 3410 domain->iommu_coherency = 0;
3411 domain->iommu_snooping = 0;
3351 domain->max_addr = 0; 3412 domain->max_addr = 0;
3352 3413
3353 /* always allocate the top pgd */ 3414 /* always allocate the top pgd */
@@ -3388,8 +3449,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
3388 3449
3389static void vm_domain_exit(struct dmar_domain *domain) 3450static void vm_domain_exit(struct dmar_domain *domain)
3390{ 3451{
3391 u64 end;
3392
3393 /* Domain 0 is reserved, so dont process it */ 3452 /* Domain 0 is reserved, so dont process it */
3394 if (!domain) 3453 if (!domain)
3395 return; 3454 return;
@@ -3397,14 +3456,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
3397 vm_domain_remove_all_dev_info(domain); 3456 vm_domain_remove_all_dev_info(domain);
3398 /* destroy iovas */ 3457 /* destroy iovas */
3399 put_iova_domain(&domain->iovad); 3458 put_iova_domain(&domain->iovad);
3400 end = DOMAIN_MAX_ADDR(domain->gaw);
3401 end = end & (~VTD_PAGE_MASK);
3402 3459
3403 /* clear ptes */ 3460 /* clear ptes */
3404 dma_pte_clear_range(domain, 0, end); 3461 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3405 3462
3406 /* free page tables */ 3463 /* free page tables */
3407 dma_pte_free_pagetable(domain, 0, end); 3464 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3408 3465
3409 iommu_free_vm_domain(domain); 3466 iommu_free_vm_domain(domain);
3410 free_domain_mem(domain); 3467 free_domain_mem(domain);
@@ -3513,7 +3570,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3513 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3570 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3514 prot |= DMA_PTE_SNP; 3571 prot |= DMA_PTE_SNP;
3515 3572
3516 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 3573 max_addr = iova + size;
3517 if (dmar_domain->max_addr < max_addr) { 3574 if (dmar_domain->max_addr < max_addr) {
3518 int min_agaw; 3575 int min_agaw;
3519 u64 end; 3576 u64 end;
@@ -3531,8 +3588,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3531 } 3588 }
3532 dmar_domain->max_addr = max_addr; 3589 dmar_domain->max_addr = max_addr;
3533 } 3590 }
3534 3591 /* Round up size to next multiple of PAGE_SIZE, if it and
3535 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 3592 the low bits of hpa would take us onto the next page */
3593 size = aligned_nrpages(hpa, size);
3594 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3595 hpa >> VTD_PAGE_SHIFT, size, prot);
3536 return ret; 3596 return ret;
3537} 3597}
3538 3598
@@ -3540,15 +3600,15 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
3540 unsigned long iova, size_t size) 3600 unsigned long iova, size_t size)
3541{ 3601{
3542 struct dmar_domain *dmar_domain = domain->priv; 3602 struct dmar_domain *dmar_domain = domain->priv;
3543 dma_addr_t base;
3544 3603
3545 /* The address might not be aligned */ 3604 if (!size)
3546 base = iova & VTD_PAGE_MASK; 3605 return;
3547 size = VTD_PAGE_ALIGN(size); 3606
3548 dma_pte_clear_range(dmar_domain, base, base + size); 3607 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3608 (iova + size - 1) >> VTD_PAGE_SHIFT);
3549 3609
3550 if (dmar_domain->max_addr == base + size) 3610 if (dmar_domain->max_addr == iova + size)
3551 dmar_domain->max_addr = base; 3611 dmar_domain->max_addr = iova;
3552} 3612}
3553 3613
3554static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3614static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3558,7 +3618,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3558 struct dma_pte *pte; 3618 struct dma_pte *pte;
3559 u64 phys = 0; 3619 u64 phys = 0;
3560 3620
3561 pte = addr_to_dma_pte(dmar_domain, iova); 3621 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3562 if (pte) 3622 if (pte)
3563 phys = dma_pte_addr(pte); 3623 phys = dma_pte_addr(pte);
3564 3624