aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c1375
1 files changed, 854 insertions, 521 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index cd389162735f..53075424a434 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -53,15 +53,35 @@
53 53
54#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 54#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
55 55
56#define MAX_AGAW_WIDTH 64
57
56#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
59#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
57 60
58#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
59#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
60#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
61 64
62#ifndef PHYSICAL_PAGE_MASK 65
63#define PHYSICAL_PAGE_MASK PAGE_MASK 66/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
64#endif 67 are never going to work. */
68static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
69{
70 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
71}
72
73static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
74{
75 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
76}
77static inline unsigned long page_to_dma_pfn(struct page *pg)
78{
79 return mm_to_dma_pfn(page_to_pfn(pg));
80}
81static inline unsigned long virt_to_dma_pfn(void *p)
82{
83 return page_to_dma_pfn(virt_to_page(p));
84}
65 85
66/* global iommu list, set NULL for ignored DMAR units */ 86/* global iommu list, set NULL for ignored DMAR units */
67static struct intel_iommu **g_iommus; 87static struct intel_iommu **g_iommus;
@@ -131,8 +151,6 @@ static inline void context_set_fault_enable(struct context_entry *context)
131 context->lo &= (((u64)-1) << 2) | 1; 151 context->lo &= (((u64)-1) << 2) | 1;
132} 152}
133 153
134#define CONTEXT_TT_MULTI_LEVEL 0
135
136static inline void context_set_translation_type(struct context_entry *context, 154static inline void context_set_translation_type(struct context_entry *context,
137 unsigned long value) 155 unsigned long value)
138{ 156{
@@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
204 222
205static inline u64 dma_pte_addr(struct dma_pte *pte) 223static inline u64 dma_pte_addr(struct dma_pte *pte)
206{ 224{
207 return (pte->val & VTD_PAGE_MASK); 225#ifdef CONFIG_64BIT
226 return pte->val & VTD_PAGE_MASK;
227#else
228 /* Must have a full atomic 64-bit read */
229 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
230#endif
208} 231}
209 232
210static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 233static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
211{ 234{
212 pte->val |= (addr & VTD_PAGE_MASK); 235 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
213} 236}
214 237
215static inline bool dma_pte_present(struct dma_pte *pte) 238static inline bool dma_pte_present(struct dma_pte *pte)
@@ -217,6 +240,19 @@ static inline bool dma_pte_present(struct dma_pte *pte)
217 return (pte->val & 3) != 0; 240 return (pte->val & 3) != 0;
218} 241}
219 242
243static inline int first_pte_in_page(struct dma_pte *pte)
244{
245 return !((unsigned long)pte & ~VTD_PAGE_MASK);
246}
247
248/*
249 * This domain is a statically identity mapping domain.
250 * 1. This domain creats a static 1:1 mapping to all usable memory.
251 * 2. It maps to each iommu if successful.
252 * 3. Each iommu mapps to this domain if successful.
253 */
254struct dmar_domain *si_domain;
255
220/* devices under the same p2p bridge are owned in one domain */ 256/* devices under the same p2p bridge are owned in one domain */
221#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) 257#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
222 258
@@ -225,6 +261,9 @@ static inline bool dma_pte_present(struct dma_pte *pte)
225 */ 261 */
226#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) 262#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
227 263
264/* si_domain contains mulitple devices */
265#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
266
228struct dmar_domain { 267struct dmar_domain {
229 int id; /* domain id */ 268 int id; /* domain id */
230 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ 269 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
@@ -233,7 +272,6 @@ struct dmar_domain {
233 struct iova_domain iovad; /* iova's that belong to this domain */ 272 struct iova_domain iovad; /* iova's that belong to this domain */
234 273
235 struct dma_pte *pgd; /* virtual address */ 274 struct dma_pte *pgd; /* virtual address */
236 spinlock_t mapping_lock; /* page table lock */
237 int gaw; /* max guest address width */ 275 int gaw; /* max guest address width */
238 276
239 /* adjusted guest address width, 0 is level 2 30-bit */ 277 /* adjusted guest address width, 0 is level 2 30-bit */
@@ -256,6 +294,7 @@ struct device_domain_info {
256 u8 bus; /* PCI bus number */ 294 u8 bus; /* PCI bus number */
257 u8 devfn; /* PCI devfn number */ 295 u8 devfn; /* PCI devfn number */
258 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ 296 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
297 struct intel_iommu *iommu; /* IOMMU used by this device */
259 struct dmar_domain *domain; /* pointer to domain */ 298 struct dmar_domain *domain; /* pointer to domain */
260}; 299};
261 300
@@ -401,17 +440,13 @@ void free_iova_mem(struct iova *iova)
401 440
402static inline int width_to_agaw(int width); 441static inline int width_to_agaw(int width);
403 442
404/* calculate agaw for each iommu. 443static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
405 * "SAGAW" may be different across iommus, use a default agaw, and
406 * get a supported less agaw for iommus that don't support the default agaw.
407 */
408int iommu_calculate_agaw(struct intel_iommu *iommu)
409{ 444{
410 unsigned long sagaw; 445 unsigned long sagaw;
411 int agaw = -1; 446 int agaw = -1;
412 447
413 sagaw = cap_sagaw(iommu->cap); 448 sagaw = cap_sagaw(iommu->cap);
414 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); 449 for (agaw = width_to_agaw(max_gaw);
415 agaw >= 0; agaw--) { 450 agaw >= 0; agaw--) {
416 if (test_bit(agaw, &sagaw)) 451 if (test_bit(agaw, &sagaw))
417 break; 452 break;
@@ -420,12 +455,32 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
420 return agaw; 455 return agaw;
421} 456}
422 457
423/* in native case, each domain is related to only one iommu */ 458/*
459 * Calculate max SAGAW for each iommu.
460 */
461int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
462{
463 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
464}
465
466/*
467 * calculate agaw for each iommu.
468 * "SAGAW" may be different across iommus, use a default agaw, and
469 * get a supported less agaw for iommus that don't support the default agaw.
470 */
471int iommu_calculate_agaw(struct intel_iommu *iommu)
472{
473 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
474}
475
476/* This functionin only returns single iommu in a domain */
424static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 477static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
425{ 478{
426 int iommu_id; 479 int iommu_id;
427 480
481 /* si_domain and vm domain should not get here. */
428 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); 482 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
483 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
429 484
430 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 485 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
431 if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 486 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
@@ -620,80 +675,78 @@ static inline int width_to_agaw(int width)
620 675
621static inline unsigned int level_to_offset_bits(int level) 676static inline unsigned int level_to_offset_bits(int level)
622{ 677{
623 return (12 + (level - 1) * LEVEL_STRIDE); 678 return (level - 1) * LEVEL_STRIDE;
624} 679}
625 680
626static inline int address_level_offset(u64 addr, int level) 681static inline int pfn_level_offset(unsigned long pfn, int level)
627{ 682{
628 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); 683 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
629} 684}
630 685
631static inline u64 level_mask(int level) 686static inline unsigned long level_mask(int level)
632{ 687{
633 return ((u64)-1 << level_to_offset_bits(level)); 688 return -1UL << level_to_offset_bits(level);
634} 689}
635 690
636static inline u64 level_size(int level) 691static inline unsigned long level_size(int level)
637{ 692{
638 return ((u64)1 << level_to_offset_bits(level)); 693 return 1UL << level_to_offset_bits(level);
639} 694}
640 695
641static inline u64 align_to_level(u64 addr, int level) 696static inline unsigned long align_to_level(unsigned long pfn, int level)
642{ 697{
643 return ((addr + level_size(level) - 1) & level_mask(level)); 698 return (pfn + level_size(level) - 1) & level_mask(level);
644} 699}
645 700
646static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) 701static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
702 unsigned long pfn)
647{ 703{
648 int addr_width = agaw_to_width(domain->agaw); 704 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
649 struct dma_pte *parent, *pte = NULL; 705 struct dma_pte *parent, *pte = NULL;
650 int level = agaw_to_level(domain->agaw); 706 int level = agaw_to_level(domain->agaw);
651 int offset; 707 int offset;
652 unsigned long flags;
653 708
654 BUG_ON(!domain->pgd); 709 BUG_ON(!domain->pgd);
655 710 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
656 addr &= (((u64)1) << addr_width) - 1;
657 parent = domain->pgd; 711 parent = domain->pgd;
658 712
659 spin_lock_irqsave(&domain->mapping_lock, flags);
660 while (level > 0) { 713 while (level > 0) {
661 void *tmp_page; 714 void *tmp_page;
662 715
663 offset = address_level_offset(addr, level); 716 offset = pfn_level_offset(pfn, level);
664 pte = &parent[offset]; 717 pte = &parent[offset];
665 if (level == 1) 718 if (level == 1)
666 break; 719 break;
667 720
668 if (!dma_pte_present(pte)) { 721 if (!dma_pte_present(pte)) {
722 uint64_t pteval;
723
669 tmp_page = alloc_pgtable_page(); 724 tmp_page = alloc_pgtable_page();
670 725
671 if (!tmp_page) { 726 if (!tmp_page)
672 spin_unlock_irqrestore(&domain->mapping_lock,
673 flags);
674 return NULL; 727 return NULL;
728
729 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
730 pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
731 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
732 /* Someone else set it while we were thinking; use theirs. */
733 free_pgtable_page(tmp_page);
734 } else {
735 dma_pte_addr(pte);
736 domain_flush_cache(domain, pte, sizeof(*pte));
675 } 737 }
676 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
677 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
678 /*
679 * high level table always sets r/w, last level page
680 * table control read/write
681 */
682 dma_set_pte_readable(pte);
683 dma_set_pte_writable(pte);
684 domain_flush_cache(domain, pte, sizeof(*pte));
685 } 738 }
686 parent = phys_to_virt(dma_pte_addr(pte)); 739 parent = phys_to_virt(dma_pte_addr(pte));
687 level--; 740 level--;
688 } 741 }
689 742
690 spin_unlock_irqrestore(&domain->mapping_lock, flags);
691 return pte; 743 return pte;
692} 744}
693 745
694/* return address's pte at specific level */ 746/* return address's pte at specific level */
695static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, 747static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
696 int level) 748 unsigned long pfn,
749 int level)
697{ 750{
698 struct dma_pte *parent, *pte = NULL; 751 struct dma_pte *parent, *pte = NULL;
699 int total = agaw_to_level(domain->agaw); 752 int total = agaw_to_level(domain->agaw);
@@ -701,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
701 754
702 parent = domain->pgd; 755 parent = domain->pgd;
703 while (level <= total) { 756 while (level <= total) {
704 offset = address_level_offset(addr, total); 757 offset = pfn_level_offset(pfn, total);
705 pte = &parent[offset]; 758 pte = &parent[offset];
706 if (level == total) 759 if (level == total)
707 return pte; 760 return pte;
@@ -714,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
714 return NULL; 767 return NULL;
715} 768}
716 769
717/* clear one page's page table */
718static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
719{
720 struct dma_pte *pte = NULL;
721
722 /* get last level pte */
723 pte = dma_addr_level_pte(domain, addr, 1);
724
725 if (pte) {
726 dma_clear_pte(pte);
727 domain_flush_cache(domain, pte, sizeof(*pte));
728 }
729}
730
731/* clear last level pte, a tlb flush should be followed */ 770/* clear last level pte, a tlb flush should be followed */
732static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) 771static void dma_pte_clear_range(struct dmar_domain *domain,
772 unsigned long start_pfn,
773 unsigned long last_pfn)
733{ 774{
734 int addr_width = agaw_to_width(domain->agaw); 775 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
735 int npages; 776 struct dma_pte *first_pte, *pte;
736 777
737 start &= (((u64)1) << addr_width) - 1; 778 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
738 end &= (((u64)1) << addr_width) - 1; 779 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
739 /* in case it's partial page */
740 start &= PAGE_MASK;
741 end = PAGE_ALIGN(end);
742 npages = (end - start) / VTD_PAGE_SIZE;
743 780
744 /* we don't need lock here, nobody else touches the iova range */ 781 /* we don't need lock here; nobody else touches the iova range */
745 while (npages--) { 782 while (start_pfn <= last_pfn) {
746 dma_pte_clear_one(domain, start); 783 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
747 start += VTD_PAGE_SIZE; 784 if (!pte) {
785 start_pfn = align_to_level(start_pfn + 1, 2);
786 continue;
787 }
788 do {
789 dma_clear_pte(pte);
790 start_pfn++;
791 pte++;
792 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
793
794 domain_flush_cache(domain, first_pte,
795 (void *)pte - (void *)first_pte);
748 } 796 }
749} 797}
750 798
751/* free page table pages. last level pte should already be cleared */ 799/* free page table pages. last level pte should already be cleared */
752static void dma_pte_free_pagetable(struct dmar_domain *domain, 800static void dma_pte_free_pagetable(struct dmar_domain *domain,
753 u64 start, u64 end) 801 unsigned long start_pfn,
802 unsigned long last_pfn)
754{ 803{
755 int addr_width = agaw_to_width(domain->agaw); 804 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
756 struct dma_pte *pte; 805 struct dma_pte *first_pte, *pte;
757 int total = agaw_to_level(domain->agaw); 806 int total = agaw_to_level(domain->agaw);
758 int level; 807 int level;
759 u64 tmp; 808 unsigned long tmp;
760 809
761 start &= (((u64)1) << addr_width) - 1; 810 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
762 end &= (((u64)1) << addr_width) - 1; 811 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
763 812
764 /* we don't need lock here, nobody else touches the iova range */ 813 /* We don't need lock here; nobody else touches the iova range */
765 level = 2; 814 level = 2;
766 while (level <= total) { 815 while (level <= total) {
767 tmp = align_to_level(start, level); 816 tmp = align_to_level(start_pfn, level);
768 if (tmp >= end || (tmp + level_size(level) > end)) 817
818 /* If we can't even clear one PTE at this level, we're done */
819 if (tmp + level_size(level) - 1 > last_pfn)
769 return; 820 return;
770 821
771 while (tmp < end) { 822 while (tmp + level_size(level) - 1 <= last_pfn) {
772 pte = dma_addr_level_pte(domain, tmp, level); 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
773 if (pte) { 824 if (!pte) {
774 free_pgtable_page( 825 tmp = align_to_level(tmp + 1, level + 1);
775 phys_to_virt(dma_pte_addr(pte))); 826 continue;
776 dma_clear_pte(pte);
777 domain_flush_cache(domain, pte, sizeof(*pte));
778 } 827 }
779 tmp += level_size(level); 828 do {
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
833 pte++;
834 tmp += level_size(level);
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
780 } 841 }
781 level++; 842 level++;
782 } 843 }
783 /* free pgd */ 844 /* free pgd */
784 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
785 free_pgtable_page(domain->pgd); 846 free_pgtable_page(domain->pgd);
786 domain->pgd = NULL; 847 domain->pgd = NULL;
787 } 848 }
@@ -809,7 +870,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
809static void iommu_set_root_entry(struct intel_iommu *iommu) 870static void iommu_set_root_entry(struct intel_iommu *iommu)
810{ 871{
811 void *addr; 872 void *addr;
812 u32 cmd, sts; 873 u32 sts;
813 unsigned long flag; 874 unsigned long flag;
814 875
815 addr = iommu->root_entry; 876 addr = iommu->root_entry;
@@ -817,12 +878,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
817 spin_lock_irqsave(&iommu->register_lock, flag); 878 spin_lock_irqsave(&iommu->register_lock, flag);
818 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); 879 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
819 880
820 cmd = iommu->gcmd | DMA_GCMD_SRTP; 881 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
821 writel(cmd, iommu->reg + DMAR_GCMD_REG);
822 882
823 /* Make sure hardware complete it */ 883 /* Make sure hardware complete it */
824 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 884 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
825 readl, (sts & DMA_GSTS_RTPS), sts); 885 readl, (sts & DMA_GSTS_RTPS), sts);
826 886
827 spin_unlock_irqrestore(&iommu->register_lock, flag); 887 spin_unlock_irqrestore(&iommu->register_lock, flag);
828} 888}
@@ -834,39 +894,25 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu)
834 894
835 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 895 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
836 return; 896 return;
837 val = iommu->gcmd | DMA_GCMD_WBF;
838 897
839 spin_lock_irqsave(&iommu->register_lock, flag); 898 spin_lock_irqsave(&iommu->register_lock, flag);
840 writel(val, iommu->reg + DMAR_GCMD_REG); 899 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
841 900
842 /* Make sure hardware complete it */ 901 /* Make sure hardware complete it */
843 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 902 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
844 readl, (!(val & DMA_GSTS_WBFS)), val); 903 readl, (!(val & DMA_GSTS_WBFS)), val);
845 904
846 spin_unlock_irqrestore(&iommu->register_lock, flag); 905 spin_unlock_irqrestore(&iommu->register_lock, flag);
847} 906}
848 907
849/* return value determine if we need a write buffer flush */ 908/* return value determine if we need a write buffer flush */
850static int __iommu_flush_context(struct intel_iommu *iommu, 909static void __iommu_flush_context(struct intel_iommu *iommu,
851 u16 did, u16 source_id, u8 function_mask, u64 type, 910 u16 did, u16 source_id, u8 function_mask,
852 int non_present_entry_flush) 911 u64 type)
853{ 912{
854 u64 val = 0; 913 u64 val = 0;
855 unsigned long flag; 914 unsigned long flag;
856 915
857 /*
858 * In the non-present entry flush case, if hardware doesn't cache
859 * non-present entry we do nothing and if hardware cache non-present
860 * entry, we flush entries of domain 0 (the domain id is used to cache
861 * any non-present entries)
862 */
863 if (non_present_entry_flush) {
864 if (!cap_caching_mode(iommu->cap))
865 return 1;
866 else
867 did = 0;
868 }
869
870 switch (type) { 916 switch (type) {
871 case DMA_CCMD_GLOBAL_INVL: 917 case DMA_CCMD_GLOBAL_INVL:
872 val = DMA_CCMD_GLOBAL_INVL; 918 val = DMA_CCMD_GLOBAL_INVL;
@@ -891,33 +937,16 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
891 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 937 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
892 938
893 spin_unlock_irqrestore(&iommu->register_lock, flag); 939 spin_unlock_irqrestore(&iommu->register_lock, flag);
894
895 /* flush context entry will implicitly flush write buffer */
896 return 0;
897} 940}
898 941
899/* return value determine if we need a write buffer flush */ 942/* return value determine if we need a write buffer flush */
900static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 943static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
901 u64 addr, unsigned int size_order, u64 type, 944 u64 addr, unsigned int size_order, u64 type)
902 int non_present_entry_flush)
903{ 945{
904 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 946 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
905 u64 val = 0, val_iva = 0; 947 u64 val = 0, val_iva = 0;
906 unsigned long flag; 948 unsigned long flag;
907 949
908 /*
909 * In the non-present entry flush case, if hardware doesn't cache
910 * non-present entry we do nothing and if hardware cache non-present
911 * entry, we flush entries of domain 0 (the domain id is used to cache
912 * any non-present entries)
913 */
914 if (non_present_entry_flush) {
915 if (!cap_caching_mode(iommu->cap))
916 return 1;
917 else
918 did = 0;
919 }
920
921 switch (type) { 950 switch (type) {
922 case DMA_TLB_GLOBAL_FLUSH: 951 case DMA_TLB_GLOBAL_FLUSH:
923 /* global flush doesn't need set IVA_REG */ 952 /* global flush doesn't need set IVA_REG */
@@ -965,37 +994,106 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
965 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 994 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
966 (unsigned long long)DMA_TLB_IIRG(type), 995 (unsigned long long)DMA_TLB_IIRG(type),
967 (unsigned long long)DMA_TLB_IAIG(val)); 996 (unsigned long long)DMA_TLB_IAIG(val));
968 /* flush iotlb entry will implicitly flush write buffer */
969 return 0;
970} 997}
971 998
972static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 999static struct device_domain_info *iommu_support_dev_iotlb(
973 u64 addr, unsigned int pages, int non_present_entry_flush) 1000 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
974{ 1001{
975 unsigned int mask; 1002 int found = 0;
1003 unsigned long flags;
1004 struct device_domain_info *info;
1005 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
976 1006
977 BUG_ON(addr & (~VTD_PAGE_MASK)); 1007 if (!ecap_dev_iotlb_support(iommu->ecap))
978 BUG_ON(pages == 0); 1008 return NULL;
1009
1010 if (!iommu->qi)
1011 return NULL;
1012
1013 spin_lock_irqsave(&device_domain_lock, flags);
1014 list_for_each_entry(info, &domain->devices, link)
1015 if (info->bus == bus && info->devfn == devfn) {
1016 found = 1;
1017 break;
1018 }
1019 spin_unlock_irqrestore(&device_domain_lock, flags);
1020
1021 if (!found || !info->dev)
1022 return NULL;
1023
1024 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1025 return NULL;
1026
1027 if (!dmar_find_matched_atsr_unit(info->dev))
1028 return NULL;
1029
1030 info->iommu = iommu;
1031
1032 return info;
1033}
1034
1035static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1036{
1037 if (!info)
1038 return;
1039
1040 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1041}
1042
1043static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1044{
1045 if (!info->dev || !pci_ats_enabled(info->dev))
1046 return;
1047
1048 pci_disable_ats(info->dev);
1049}
1050
1051static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1052 u64 addr, unsigned mask)
1053{
1054 u16 sid, qdep;
1055 unsigned long flags;
1056 struct device_domain_info *info;
1057
1058 spin_lock_irqsave(&device_domain_lock, flags);
1059 list_for_each_entry(info, &domain->devices, link) {
1060 if (!info->dev || !pci_ats_enabled(info->dev))
1061 continue;
1062
1063 sid = info->bus << 8 | info->devfn;
1064 qdep = pci_ats_queue_depth(info->dev);
1065 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1066 }
1067 spin_unlock_irqrestore(&device_domain_lock, flags);
1068}
979 1069
980 /* Fallback to domain selective flush if no PSI support */ 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
981 if (!cap_pgsel_inv(iommu->cap)) 1071 unsigned long pfn, unsigned int pages)
982 return iommu->flush.flush_iotlb(iommu, did, 0, 0, 1072{
983 DMA_TLB_DSI_FLUSH, 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
984 non_present_entry_flush); 1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1075
1076 BUG_ON(pages == 0);
985 1077
986 /* 1078 /*
1079 * Fallback to domain selective flush if no PSI support or the size is
1080 * too big.
987 * PSI requires page size to be 2 ^ x, and the base address is naturally 1081 * PSI requires page size to be 2 ^ x, and the base address is naturally
988 * aligned to the size 1082 * aligned to the size
989 */ 1083 */
990 mask = ilog2(__roundup_pow_of_two(pages)); 1084 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
991 /* Fallback to domain selective flush if size is too big */ 1085 iommu->flush.flush_iotlb(iommu, did, 0, 0,
992 if (mask > cap_max_amask_val(iommu->cap)) 1086 DMA_TLB_DSI_FLUSH);
993 return iommu->flush.flush_iotlb(iommu, did, 0, 0, 1087 else
994 DMA_TLB_DSI_FLUSH, non_present_entry_flush); 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1089 DMA_TLB_PSI_FLUSH);
995 1090
996 return iommu->flush.flush_iotlb(iommu, did, addr, mask, 1091 /*
997 DMA_TLB_PSI_FLUSH, 1092 * In caching mode, domain ID 0 is reserved for non-present to present
998 non_present_entry_flush); 1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
999} 1097}
1000 1098
1001static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1099static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1021,13 +1119,13 @@ static int iommu_enable_translation(struct intel_iommu *iommu)
1021 unsigned long flags; 1119 unsigned long flags;
1022 1120
1023 spin_lock_irqsave(&iommu->register_lock, flags); 1121 spin_lock_irqsave(&iommu->register_lock, flags);
1024 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); 1122 iommu->gcmd |= DMA_GCMD_TE;
1123 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1025 1124
1026 /* Make sure hardware complete it */ 1125 /* Make sure hardware complete it */
1027 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1126 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1028 readl, (sts & DMA_GSTS_TES), sts); 1127 readl, (sts & DMA_GSTS_TES), sts);
1029 1128
1030 iommu->gcmd |= DMA_GCMD_TE;
1031 spin_unlock_irqrestore(&iommu->register_lock, flags); 1129 spin_unlock_irqrestore(&iommu->register_lock, flags);
1032 return 0; 1130 return 0;
1033} 1131}
@@ -1043,7 +1141,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
1043 1141
1044 /* Make sure hardware complete it */ 1142 /* Make sure hardware complete it */
1045 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1143 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1046 readl, (!(sts & DMA_GSTS_TES)), sts); 1144 readl, (!(sts & DMA_GSTS_TES)), sts);
1047 1145
1048 spin_unlock_irqrestore(&iommu->register_lock, flag); 1146 spin_unlock_irqrestore(&iommu->register_lock, flag);
1049 return 0; 1147 return 0;
@@ -1142,48 +1240,71 @@ void free_dmar_iommu(struct intel_iommu *iommu)
1142 free_context_table(iommu); 1240 free_context_table(iommu);
1143} 1241}
1144 1242
1145static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) 1243static struct dmar_domain *alloc_domain(void)
1146{ 1244{
1147 unsigned long num;
1148 unsigned long ndomains;
1149 struct dmar_domain *domain; 1245 struct dmar_domain *domain;
1150 unsigned long flags;
1151 1246
1152 domain = alloc_domain_mem(); 1247 domain = alloc_domain_mem();
1153 if (!domain) 1248 if (!domain)
1154 return NULL; 1249 return NULL;
1155 1250
1251 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1252 domain->flags = 0;
1253
1254 return domain;
1255}
1256
1257static int iommu_attach_domain(struct dmar_domain *domain,
1258 struct intel_iommu *iommu)
1259{
1260 int num;
1261 unsigned long ndomains;
1262 unsigned long flags;
1263
1156 ndomains = cap_ndoms(iommu->cap); 1264 ndomains = cap_ndoms(iommu->cap);
1157 1265
1158 spin_lock_irqsave(&iommu->lock, flags); 1266 spin_lock_irqsave(&iommu->lock, flags);
1267
1159 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1268 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1160 if (num >= ndomains) { 1269 if (num >= ndomains) {
1161 spin_unlock_irqrestore(&iommu->lock, flags); 1270 spin_unlock_irqrestore(&iommu->lock, flags);
1162 free_domain_mem(domain);
1163 printk(KERN_ERR "IOMMU: no free domain ids\n"); 1271 printk(KERN_ERR "IOMMU: no free domain ids\n");
1164 return NULL; 1272 return -ENOMEM;
1165 } 1273 }
1166 1274
1167 set_bit(num, iommu->domain_ids);
1168 domain->id = num; 1275 domain->id = num;
1169 memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 1276 set_bit(num, iommu->domain_ids);
1170 set_bit(iommu->seq_id, &domain->iommu_bmp); 1277 set_bit(iommu->seq_id, &domain->iommu_bmp);
1171 domain->flags = 0;
1172 iommu->domains[num] = domain; 1278 iommu->domains[num] = domain;
1173 spin_unlock_irqrestore(&iommu->lock, flags); 1279 spin_unlock_irqrestore(&iommu->lock, flags);
1174 1280
1175 return domain; 1281 return 0;
1176} 1282}
1177 1283
1178static void iommu_free_domain(struct dmar_domain *domain) 1284static void iommu_detach_domain(struct dmar_domain *domain,
1285 struct intel_iommu *iommu)
1179{ 1286{
1180 unsigned long flags; 1287 unsigned long flags;
1181 struct intel_iommu *iommu; 1288 int num, ndomains;
1182 1289 int found = 0;
1183 iommu = domain_get_iommu(domain);
1184 1290
1185 spin_lock_irqsave(&iommu->lock, flags); 1291 spin_lock_irqsave(&iommu->lock, flags);
1186 clear_bit(domain->id, iommu->domain_ids); 1292 ndomains = cap_ndoms(iommu->cap);
1293 num = find_first_bit(iommu->domain_ids, ndomains);
1294 for (; num < ndomains; ) {
1295 if (iommu->domains[num] == domain) {
1296 found = 1;
1297 break;
1298 }
1299 num = find_next_bit(iommu->domain_ids,
1300 cap_ndoms(iommu->cap), num+1);
1301 }
1302
1303 if (found) {
1304 clear_bit(num, iommu->domain_ids);
1305 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1306 iommu->domains[num] = NULL;
1307 }
1187 spin_unlock_irqrestore(&iommu->lock, flags); 1308 spin_unlock_irqrestore(&iommu->lock, flags);
1188} 1309}
1189 1310
@@ -1196,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
1196 struct pci_dev *pdev = NULL; 1317 struct pci_dev *pdev = NULL;
1197 struct iova *iova; 1318 struct iova *iova;
1198 int i; 1319 int i;
1199 u64 addr, size;
1200 1320
1201 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1321 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1202 1322
@@ -1219,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
1219 r = &pdev->resource[i]; 1339 r = &pdev->resource[i];
1220 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1221 continue; 1341 continue;
1222 addr = r->start; 1342 iova = reserve_iova(&reserved_iova_list,
1223 addr &= PHYSICAL_PAGE_MASK; 1343 IOVA_PFN(r->start),
1224 size = r->end - addr; 1344 IOVA_PFN(r->end));
1225 size = PAGE_ALIGN(size);
1226 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1227 IOVA_PFN(size + addr) - 1);
1228 if (!iova) 1345 if (!iova)
1229 printk(KERN_ERR "Reserve iova failed\n"); 1346 printk(KERN_ERR "Reserve iova failed\n");
1230 } 1347 }
@@ -1258,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1258 unsigned long sagaw; 1375 unsigned long sagaw;
1259 1376
1260 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1261 spin_lock_init(&domain->mapping_lock);
1262 spin_lock_init(&domain->iommu_lock); 1378 spin_lock_init(&domain->iommu_lock);
1263 1379
1264 domain_reserve_special_ranges(domain); 1380 domain_reserve_special_ranges(domain);
@@ -1303,7 +1419,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1303 1419
1304static void domain_exit(struct dmar_domain *domain) 1420static void domain_exit(struct dmar_domain *domain)
1305{ 1421{
1306 u64 end; 1422 struct dmar_drhd_unit *drhd;
1423 struct intel_iommu *iommu;
1307 1424
1308 /* Domain 0 is reserved, so dont process it */ 1425 /* Domain 0 is reserved, so dont process it */
1309 if (!domain) 1426 if (!domain)
@@ -1312,21 +1429,22 @@ static void domain_exit(struct dmar_domain *domain)
1312 domain_remove_dev_info(domain); 1429 domain_remove_dev_info(domain);
1313 /* destroy iovas */ 1430 /* destroy iovas */
1314 put_iova_domain(&domain->iovad); 1431 put_iova_domain(&domain->iovad);
1315 end = DOMAIN_MAX_ADDR(domain->gaw);
1316 end = end & (~PAGE_MASK);
1317 1432
1318 /* clear ptes */ 1433 /* clear ptes */
1319 dma_pte_clear_range(domain, 0, end); 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1320 1435
1321 /* free page tables */ 1436 /* free page tables */
1322 dma_pte_free_pagetable(domain, 0, end); 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1438
1439 for_each_active_iommu(iommu, drhd)
1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1441 iommu_detach_domain(domain, iommu);
1323 1442
1324 iommu_free_domain(domain);
1325 free_domain_mem(domain); 1443 free_domain_mem(domain);
1326} 1444}
1327 1445
1328static int domain_context_mapping_one(struct dmar_domain *domain, 1446static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1329 int segment, u8 bus, u8 devfn) 1447 u8 bus, u8 devfn, int translation)
1330{ 1448{
1331 struct context_entry *context; 1449 struct context_entry *context;
1332 unsigned long flags; 1450 unsigned long flags;
@@ -1336,10 +1454,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1336 unsigned long ndomains; 1454 unsigned long ndomains;
1337 int id; 1455 int id;
1338 int agaw; 1456 int agaw;
1457 struct device_domain_info *info = NULL;
1339 1458
1340 pr_debug("Set context mapping for %02x:%02x.%d\n", 1459 pr_debug("Set context mapping for %02x:%02x.%d\n",
1341 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1460 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1461
1342 BUG_ON(!domain->pgd); 1462 BUG_ON(!domain->pgd);
1463 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1464 translation != CONTEXT_TT_MULTI_LEVEL);
1343 1465
1344 iommu = device_to_iommu(segment, bus, devfn); 1466 iommu = device_to_iommu(segment, bus, devfn);
1345 if (!iommu) 1467 if (!iommu)
@@ -1357,7 +1479,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1357 id = domain->id; 1479 id = domain->id;
1358 pgd = domain->pgd; 1480 pgd = domain->pgd;
1359 1481
1360 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { 1482 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1483 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1361 int found = 0; 1484 int found = 0;
1362 1485
1363 /* find an available domain id for this device in iommu */ 1486 /* find an available domain id for this device in iommu */
@@ -1382,6 +1505,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1382 } 1505 }
1383 1506
1384 set_bit(num, iommu->domain_ids); 1507 set_bit(num, iommu->domain_ids);
1508 set_bit(iommu->seq_id, &domain->iommu_bmp);
1385 iommu->domains[num] = domain; 1509 iommu->domains[num] = domain;
1386 id = num; 1510 id = num;
1387 } 1511 }
@@ -1399,21 +1523,44 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1399 } 1523 }
1400 1524
1401 context_set_domain_id(context, id); 1525 context_set_domain_id(context, id);
1402 context_set_address_width(context, iommu->agaw); 1526
1403 context_set_address_root(context, virt_to_phys(pgd)); 1527 if (translation != CONTEXT_TT_PASS_THROUGH) {
1404 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); 1528 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1529 translation = info ? CONTEXT_TT_DEV_IOTLB :
1530 CONTEXT_TT_MULTI_LEVEL;
1531 }
1532 /*
1533 * In pass through mode, AW must be programmed to indicate the largest
1534 * AGAW value supported by hardware. And ASR is ignored by hardware.
1535 */
1536 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1537 context_set_address_width(context, iommu->msagaw);
1538 else {
1539 context_set_address_root(context, virt_to_phys(pgd));
1540 context_set_address_width(context, iommu->agaw);
1541 }
1542
1543 context_set_translation_type(context, translation);
1405 context_set_fault_enable(context); 1544 context_set_fault_enable(context);
1406 context_set_present(context); 1545 context_set_present(context);
1407 domain_flush_cache(domain, context, sizeof(*context)); 1546 domain_flush_cache(domain, context, sizeof(*context));
1408 1547
1409 /* it's a non-present to present mapping */ 1548 /*
1410 if (iommu->flush.flush_context(iommu, domain->id, 1549 * It's a non-present to present mapping. If hardware doesn't cache
1411 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1550 * non-present entry we only need to flush the write-buffer. If the
1412 DMA_CCMD_DEVICE_INVL, 1)) 1551 * _does_ cache non-present entries, then it does so in the special
1552 * domain #0, which we have to flush:
1553 */
1554 if (cap_caching_mode(iommu->cap)) {
1555 iommu->flush.flush_context(iommu, 0,
1556 (((u16)bus) << 8) | devfn,
1557 DMA_CCMD_MASK_NOBIT,
1558 DMA_CCMD_DEVICE_INVL);
1559 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
1560 } else {
1413 iommu_flush_write_buffer(iommu); 1561 iommu_flush_write_buffer(iommu);
1414 else 1562 }
1415 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); 1563 iommu_enable_dev_iotlb(info);
1416
1417 spin_unlock_irqrestore(&iommu->lock, flags); 1564 spin_unlock_irqrestore(&iommu->lock, flags);
1418 1565
1419 spin_lock_irqsave(&domain->iommu_lock, flags); 1566 spin_lock_irqsave(&domain->iommu_lock, flags);
@@ -1426,13 +1573,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1426} 1573}
1427 1574
1428static int 1575static int
1429domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) 1576domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1577 int translation)
1430{ 1578{
1431 int ret; 1579 int ret;
1432 struct pci_dev *tmp, *parent; 1580 struct pci_dev *tmp, *parent;
1433 1581
1434 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), 1582 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1435 pdev->bus->number, pdev->devfn); 1583 pdev->bus->number, pdev->devfn,
1584 translation);
1436 if (ret) 1585 if (ret)
1437 return ret; 1586 return ret;
1438 1587
@@ -1446,7 +1595,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1446 ret = domain_context_mapping_one(domain, 1595 ret = domain_context_mapping_one(domain,
1447 pci_domain_nr(parent->bus), 1596 pci_domain_nr(parent->bus),
1448 parent->bus->number, 1597 parent->bus->number,
1449 parent->devfn); 1598 parent->devfn, translation);
1450 if (ret) 1599 if (ret)
1451 return ret; 1600 return ret;
1452 parent = parent->bus->self; 1601 parent = parent->bus->self;
@@ -1454,12 +1603,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1454 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ 1603 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1455 return domain_context_mapping_one(domain, 1604 return domain_context_mapping_one(domain,
1456 pci_domain_nr(tmp->subordinate), 1605 pci_domain_nr(tmp->subordinate),
1457 tmp->subordinate->number, 0); 1606 tmp->subordinate->number, 0,
1607 translation);
1458 else /* this is a legacy PCI bridge */ 1608 else /* this is a legacy PCI bridge */
1459 return domain_context_mapping_one(domain, 1609 return domain_context_mapping_one(domain,
1460 pci_domain_nr(tmp->bus), 1610 pci_domain_nr(tmp->bus),
1461 tmp->bus->number, 1611 tmp->bus->number,
1462 tmp->devfn); 1612 tmp->devfn,
1613 translation);
1463} 1614}
1464 1615
1465static int domain_context_mapped(struct pci_dev *pdev) 1616static int domain_context_mapped(struct pci_dev *pdev)
@@ -1497,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
1497 tmp->devfn); 1648 tmp->devfn);
1498} 1649}
1499 1650
1500static int 1651static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1501domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 1652 struct scatterlist *sg, unsigned long phys_pfn,
1502 u64 hpa, size_t size, int prot) 1653 unsigned long nr_pages, int prot)
1503{ 1654{
1504 u64 start_pfn, end_pfn; 1655 struct dma_pte *first_pte = NULL, *pte = NULL;
1505 struct dma_pte *pte; 1656 phys_addr_t uninitialized_var(pteval);
1506 int index; 1657 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1507 int addr_width = agaw_to_width(domain->agaw); 1658 unsigned long sg_res;
1508 1659
1509 hpa &= (((u64)1) << addr_width) - 1; 1660 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1510 1661
1511 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1662 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1512 return -EINVAL; 1663 return -EINVAL;
1513 iova &= PAGE_MASK; 1664
1514 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; 1665 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1515 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; 1666
1516 index = 0; 1667 if (sg)
1517 while (start_pfn < end_pfn) { 1668 sg_res = 0;
1518 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); 1669 else {
1519 if (!pte) 1670 sg_res = nr_pages + 1;
1520 return -ENOMEM; 1671 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1672 }
1673
1674 while (nr_pages--) {
1675 uint64_t tmp;
1676
1677 if (!sg_res) {
1678 sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
1679 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1680 sg->dma_length = sg->length;
1681 pteval = page_to_phys(sg_page(sg)) | prot;
1682 }
1683 if (!pte) {
1684 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1685 if (!pte)
1686 return -ENOMEM;
1687 }
1521 /* We don't need lock here, nobody else 1688 /* We don't need lock here, nobody else
1522 * touches the iova range 1689 * touches the iova range
1523 */ 1690 */
1524 BUG_ON(dma_pte_addr(pte)); 1691 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1525 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1692 if (tmp) {
1526 dma_set_pte_prot(pte, prot); 1693 static int dumps = 5;
1527 if (prot & DMA_PTE_SNP) 1694 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1528 dma_set_pte_snp(pte); 1695 iov_pfn, tmp, (unsigned long long)pteval);
1529 domain_flush_cache(domain, pte, sizeof(*pte)); 1696 if (dumps) {
1530 start_pfn++; 1697 dumps--;
1531 index++; 1698 debug_dma_dump_mappings(NULL);
1699 }
1700 WARN_ON(1);
1701 }
1702 pte++;
1703 if (!nr_pages || first_pte_in_page(pte)) {
1704 domain_flush_cache(domain, first_pte,
1705 (void *)pte - (void *)first_pte);
1706 pte = NULL;
1707 }
1708 iov_pfn++;
1709 pteval += VTD_PAGE_SIZE;
1710 sg_res--;
1711 if (!sg_res)
1712 sg = sg_next(sg);
1532 } 1713 }
1533 return 0; 1714 return 0;
1534} 1715}
1535 1716
1717static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1718 struct scatterlist *sg, unsigned long nr_pages,
1719 int prot)
1720{
1721 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1722}
1723
1724static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1725 unsigned long phys_pfn, unsigned long nr_pages,
1726 int prot)
1727{
1728 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1729}
1730
1536static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1731static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1537{ 1732{
1538 if (!iommu) 1733 if (!iommu)
@@ -1540,9 +1735,8 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1540 1735
1541 clear_context_table(iommu, bus, devfn); 1736 clear_context_table(iommu, bus, devfn);
1542 iommu->flush.flush_context(iommu, 0, 0, 0, 1737 iommu->flush.flush_context(iommu, 0, 0, 0,
1543 DMA_CCMD_GLOBAL_INVL, 0); 1738 DMA_CCMD_GLOBAL_INVL);
1544 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1739 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1545 DMA_TLB_GLOBAL_FLUSH, 0);
1546} 1740}
1547 1741
1548static void domain_remove_dev_info(struct dmar_domain *domain) 1742static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -1561,6 +1755,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
1561 info->dev->dev.archdata.iommu = NULL; 1755 info->dev->dev.archdata.iommu = NULL;
1562 spin_unlock_irqrestore(&device_domain_lock, flags); 1756 spin_unlock_irqrestore(&device_domain_lock, flags);
1563 1757
1758 iommu_disable_dev_iotlb(info);
1564 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 1759 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1565 iommu_detach_dev(iommu, info->bus, info->devfn); 1760 iommu_detach_dev(iommu, info->bus, info->devfn);
1566 free_devinfo_mem(info); 1761 free_devinfo_mem(info);
@@ -1597,6 +1792,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1597 unsigned long flags; 1792 unsigned long flags;
1598 int bus = 0, devfn = 0; 1793 int bus = 0, devfn = 0;
1599 int segment; 1794 int segment;
1795 int ret;
1600 1796
1601 domain = find_domain(pdev); 1797 domain = find_domain(pdev);
1602 if (domain) 1798 if (domain)
@@ -1629,6 +1825,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1629 } 1825 }
1630 } 1826 }
1631 1827
1828 domain = alloc_domain();
1829 if (!domain)
1830 goto error;
1831
1632 /* Allocate new domain for the device */ 1832 /* Allocate new domain for the device */
1633 drhd = dmar_find_matched_drhd_unit(pdev); 1833 drhd = dmar_find_matched_drhd_unit(pdev);
1634 if (!drhd) { 1834 if (!drhd) {
@@ -1638,9 +1838,11 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1638 } 1838 }
1639 iommu = drhd->iommu; 1839 iommu = drhd->iommu;
1640 1840
1641 domain = iommu_alloc_domain(iommu); 1841 ret = iommu_attach_domain(domain, iommu);
1642 if (!domain) 1842 if (ret) {
1843 domain_exit(domain);
1643 goto error; 1844 goto error;
1845 }
1644 1846
1645 if (domain_init(domain, gaw)) { 1847 if (domain_init(domain, gaw)) {
1646 domain_exit(domain); 1848 domain_exit(domain);
@@ -1714,55 +1916,63 @@ error:
1714 return find_domain(pdev); 1916 return find_domain(pdev);
1715} 1917}
1716 1918
1919static int iommu_identity_mapping;
1920
1921static int iommu_domain_identity_map(struct dmar_domain *domain,
1922 unsigned long long start,
1923 unsigned long long end)
1924{
1925 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1926 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1927
1928 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1929 dma_to_mm_pfn(last_vpfn))) {
1930 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1931 return -ENOMEM;
1932 }
1933
1934 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1935 start, end, domain->id);
1936 /*
1937 * RMRR range might have overlap with physical memory range,
1938 * clear it first
1939 */
1940 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1941
1942 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1943 last_vpfn - first_vpfn + 1,
1944 DMA_PTE_READ|DMA_PTE_WRITE);
1945}
1946
1717static int iommu_prepare_identity_map(struct pci_dev *pdev, 1947static int iommu_prepare_identity_map(struct pci_dev *pdev,
1718 unsigned long long start, 1948 unsigned long long start,
1719 unsigned long long end) 1949 unsigned long long end)
1720{ 1950{
1721 struct dmar_domain *domain; 1951 struct dmar_domain *domain;
1722 unsigned long size;
1723 unsigned long long base;
1724 int ret; 1952 int ret;
1725 1953
1726 printk(KERN_INFO 1954 printk(KERN_INFO
1727 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1955 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1728 pci_name(pdev), start, end); 1956 pci_name(pdev), start, end);
1729 /* page table init */ 1957
1730 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 1958 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1731 if (!domain) 1959 if (!domain)
1732 return -ENOMEM; 1960 return -ENOMEM;
1733 1961
1734 /* The address might not be aligned */ 1962 ret = iommu_domain_identity_map(domain, start, end);
1735 base = start & PAGE_MASK; 1963 if (ret)
1736 size = end - base;
1737 size = PAGE_ALIGN(size);
1738 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1739 IOVA_PFN(base + size) - 1)) {
1740 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1741 ret = -ENOMEM;
1742 goto error; 1964 goto error;
1743 }
1744 1965
1745 pr_debug("Mapping reserved region %lx@%llx for %s\n", 1966 /* context entry init */
1746 size, base, pci_name(pdev)); 1967 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1747 /*
1748 * RMRR range might have overlap with physical memory range,
1749 * clear it first
1750 */
1751 dma_pte_clear_range(domain, base, base + size);
1752
1753 ret = domain_page_mapping(domain, base, base, size,
1754 DMA_PTE_READ|DMA_PTE_WRITE);
1755 if (ret) 1968 if (ret)
1756 goto error; 1969 goto error;
1757 1970
1758 /* context entry init */ 1971 return 0;
1759 ret = domain_context_mapping(domain, pdev); 1972
1760 if (!ret) 1973 error:
1761 return 0;
1762error:
1763 domain_exit(domain); 1974 domain_exit(domain);
1764 return ret; 1975 return ret;
1765
1766} 1976}
1767 1977
1768static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 1978static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1774,96 +1984,179 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1774 rmrr->end_address + 1); 1984 rmrr->end_address + 1);
1775} 1985}
1776 1986
1777#ifdef CONFIG_DMAR_GFX_WA 1987#ifdef CONFIG_DMAR_FLOPPY_WA
1778struct iommu_prepare_data { 1988static inline void iommu_prepare_isa(void)
1989{
1779 struct pci_dev *pdev; 1990 struct pci_dev *pdev;
1780 int ret; 1991 int ret;
1781};
1782 1992
1783static int __init iommu_prepare_work_fn(unsigned long start_pfn, 1993 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1784 unsigned long end_pfn, void *datax) 1994 if (!pdev)
1785{ 1995 return;
1786 struct iommu_prepare_data *data;
1787 1996
1788 data = (struct iommu_prepare_data *)datax; 1997 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
1998 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1789 1999
1790 data->ret = iommu_prepare_identity_map(data->pdev, 2000 if (ret)
1791 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); 2001 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
1792 return data->ret; 2002 "floppy might not work\n");
1793 2003
1794} 2004}
1795 2005#else
1796static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) 2006static inline void iommu_prepare_isa(void)
1797{ 2007{
1798 int nid; 2008 return;
1799 struct iommu_prepare_data data;
1800
1801 data.pdev = pdev;
1802 data.ret = 0;
1803
1804 for_each_online_node(nid) {
1805 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1806 if (data.ret)
1807 return data.ret;
1808 }
1809 return data.ret;
1810} 2009}
2010#endif /* !CONFIG_DMAR_FLPY_WA */
1811 2011
1812static void __init iommu_prepare_gfx_mapping(void) 2012/* Initialize each context entry as pass through.*/
2013static int __init init_context_pass_through(void)
1813{ 2014{
1814 struct pci_dev *pdev = NULL; 2015 struct pci_dev *pdev = NULL;
2016 struct dmar_domain *domain;
1815 int ret; 2017 int ret;
1816 2018
1817 for_each_pci_dev(pdev) { 2019 for_each_pci_dev(pdev) {
1818 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO || 2020 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1819 !IS_GFX_DEVICE(pdev)) 2021 ret = domain_context_mapping(domain, pdev,
1820 continue; 2022 CONTEXT_TT_PASS_THROUGH);
1821 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1822 pci_name(pdev));
1823 ret = iommu_prepare_with_active_regions(pdev);
1824 if (ret) 2023 if (ret)
1825 printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); 2024 return ret;
1826 } 2025 }
2026 return 0;
1827} 2027}
1828#else /* !CONFIG_DMAR_GFX_WA */ 2028
1829static inline void iommu_prepare_gfx_mapping(void) 2029static int md_domain_init(struct dmar_domain *domain, int guest_width);
2030
2031static int __init si_domain_work_fn(unsigned long start_pfn,
2032 unsigned long end_pfn, void *datax)
1830{ 2033{
1831 return; 2034 int *ret = datax;
2035
2036 *ret = iommu_domain_identity_map(si_domain,
2037 (uint64_t)start_pfn << PAGE_SHIFT,
2038 (uint64_t)end_pfn << PAGE_SHIFT);
2039 return *ret;
2040
1832} 2041}
1833#endif
1834 2042
1835#ifdef CONFIG_DMAR_FLOPPY_WA 2043static int si_domain_init(void)
1836static inline void iommu_prepare_isa(void)
1837{ 2044{
1838 struct pci_dev *pdev; 2045 struct dmar_drhd_unit *drhd;
1839 int ret; 2046 struct intel_iommu *iommu;
2047 int nid, ret = 0;
1840 2048
1841 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); 2049 si_domain = alloc_domain();
1842 if (!pdev) 2050 if (!si_domain)
1843 return; 2051 return -EFAULT;
1844 2052
1845 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); 2053 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
1846 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1847 2054
1848 if (ret) 2055 for_each_active_iommu(iommu, drhd) {
1849 printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " 2056 ret = iommu_attach_domain(si_domain, iommu);
1850 "floppy might not work\n"); 2057 if (ret) {
2058 domain_exit(si_domain);
2059 return -EFAULT;
2060 }
2061 }
2062
2063 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2064 domain_exit(si_domain);
2065 return -EFAULT;
2066 }
1851 2067
2068 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2069
2070 for_each_online_node(nid) {
2071 work_with_active_regions(nid, si_domain_work_fn, &ret);
2072 if (ret)
2073 return ret;
2074 }
2075
2076 return 0;
1852} 2077}
1853#else 2078
1854static inline void iommu_prepare_isa(void) 2079static void domain_remove_one_dev_info(struct dmar_domain *domain,
2080 struct pci_dev *pdev);
2081static int identity_mapping(struct pci_dev *pdev)
1855{ 2082{
1856 return; 2083 struct device_domain_info *info;
2084
2085 if (likely(!iommu_identity_mapping))
2086 return 0;
2087
2088
2089 list_for_each_entry(info, &si_domain->devices, link)
2090 if (info->dev == pdev)
2091 return 1;
2092 return 0;
2093}
2094
2095static int domain_add_dev_info(struct dmar_domain *domain,
2096 struct pci_dev *pdev)
2097{
2098 struct device_domain_info *info;
2099 unsigned long flags;
2100
2101 info = alloc_devinfo_mem();
2102 if (!info)
2103 return -ENOMEM;
2104
2105 info->segment = pci_domain_nr(pdev->bus);
2106 info->bus = pdev->bus->number;
2107 info->devfn = pdev->devfn;
2108 info->dev = pdev;
2109 info->domain = domain;
2110
2111 spin_lock_irqsave(&device_domain_lock, flags);
2112 list_add(&info->link, &domain->devices);
2113 list_add(&info->global, &device_domain_list);
2114 pdev->dev.archdata.iommu = info;
2115 spin_unlock_irqrestore(&device_domain_lock, flags);
2116
2117 return 0;
2118}
2119
2120static int iommu_prepare_static_identity_mapping(void)
2121{
2122 struct pci_dev *pdev = NULL;
2123 int ret;
2124
2125 ret = si_domain_init();
2126 if (ret)
2127 return -EFAULT;
2128
2129 for_each_pci_dev(pdev) {
2130 printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
2131 pci_name(pdev));
2132
2133 ret = domain_context_mapping(si_domain, pdev,
2134 CONTEXT_TT_MULTI_LEVEL);
2135 if (ret)
2136 return ret;
2137 ret = domain_add_dev_info(si_domain, pdev);
2138 if (ret)
2139 return ret;
2140 }
2141
2142 return 0;
1857} 2143}
1858#endif /* !CONFIG_DMAR_FLPY_WA */
1859 2144
1860static int __init init_dmars(void) 2145int __init init_dmars(void)
1861{ 2146{
1862 struct dmar_drhd_unit *drhd; 2147 struct dmar_drhd_unit *drhd;
1863 struct dmar_rmrr_unit *rmrr; 2148 struct dmar_rmrr_unit *rmrr;
1864 struct pci_dev *pdev; 2149 struct pci_dev *pdev;
1865 struct intel_iommu *iommu; 2150 struct intel_iommu *iommu;
1866 int i, ret; 2151 int i, ret;
2152 int pass_through = 1;
2153
2154 /*
2155 * In case pass through can not be enabled, iommu tries to use identity
2156 * mapping.
2157 */
2158 if (iommu_pass_through)
2159 iommu_identity_mapping = 1;
1867 2160
1868 /* 2161 /*
1869 * for each drhd 2162 * for each drhd
@@ -1917,7 +2210,15 @@ static int __init init_dmars(void)
1917 printk(KERN_ERR "IOMMU: allocate root entry failed\n"); 2210 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1918 goto error; 2211 goto error;
1919 } 2212 }
2213 if (!ecap_pass_through(iommu->ecap))
2214 pass_through = 0;
1920 } 2215 }
2216 if (iommu_pass_through)
2217 if (!pass_through) {
2218 printk(KERN_INFO
2219 "Pass Through is not supported by hardware.\n");
2220 iommu_pass_through = 0;
2221 }
1921 2222
1922 /* 2223 /*
1923 * Start from the sane iommu hardware state. 2224 * Start from the sane iommu hardware state.
@@ -1973,35 +2274,58 @@ static int __init init_dmars(void)
1973 } 2274 }
1974 2275
1975 /* 2276 /*
1976 * For each rmrr 2277 * If pass through is set and enabled, context entries of all pci
1977 * for each dev attached to rmrr 2278 * devices are intialized by pass through translation type.
1978 * do
1979 * locate drhd for dev, alloc domain for dev
1980 * allocate free domain
1981 * allocate page table entries for rmrr
1982 * if context not allocated for bus
1983 * allocate and init context
1984 * set present in root table for this bus
1985 * init context with domain, translation etc
1986 * endfor
1987 * endfor
1988 */ 2279 */
1989 for_each_rmrr_units(rmrr) { 2280 if (iommu_pass_through) {
1990 for (i = 0; i < rmrr->devices_cnt; i++) { 2281 ret = init_context_pass_through();
1991 pdev = rmrr->devices[i]; 2282 if (ret) {
1992 /* some BIOS lists non-exist devices in DMAR table */ 2283 printk(KERN_ERR "IOMMU: Pass through init failed.\n");
1993 if (!pdev) 2284 iommu_pass_through = 0;
1994 continue;
1995 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1996 if (ret)
1997 printk(KERN_ERR
1998 "IOMMU: mapping reserved region failed\n");
1999 } 2285 }
2000 } 2286 }
2001 2287
2002 iommu_prepare_gfx_mapping(); 2288 /*
2289 * If pass through is not set or not enabled, setup context entries for
2290 * identity mappings for rmrr, gfx, and isa and may fall back to static
2291 * identity mapping if iommu_identity_mapping is set.
2292 */
2293 if (!iommu_pass_through) {
2294 if (iommu_identity_mapping)
2295 iommu_prepare_static_identity_mapping();
2296 /*
2297 * For each rmrr
2298 * for each dev attached to rmrr
2299 * do
2300 * locate drhd for dev, alloc domain for dev
2301 * allocate free domain
2302 * allocate page table entries for rmrr
2303 * if context not allocated for bus
2304 * allocate and init context
2305 * set present in root table for this bus
2306 * init context with domain, translation etc
2307 * endfor
2308 * endfor
2309 */
2310 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2311 for_each_rmrr_units(rmrr) {
2312 for (i = 0; i < rmrr->devices_cnt; i++) {
2313 pdev = rmrr->devices[i];
2314 /*
2315 * some BIOS lists non-exist devices in DMAR
2316 * table.
2317 */
2318 if (!pdev)
2319 continue;
2320 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2321 if (ret)
2322 printk(KERN_ERR
2323 "IOMMU: mapping reserved region failed\n");
2324 }
2325 }
2003 2326
2004 iommu_prepare_isa(); 2327 iommu_prepare_isa();
2328 }
2005 2329
2006 /* 2330 /*
2007 * for each drhd 2331 * for each drhd
@@ -2023,10 +2347,8 @@ static int __init init_dmars(void)
2023 2347
2024 iommu_set_root_entry(iommu); 2348 iommu_set_root_entry(iommu);
2025 2349
2026 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, 2350 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2027 0); 2351 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2028 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2029 0);
2030 iommu_disable_protect_mem_regions(iommu); 2352 iommu_disable_protect_mem_regions(iommu);
2031 2353
2032 ret = iommu_enable_translation(iommu); 2354 ret = iommu_enable_translation(iommu);
@@ -2046,50 +2368,40 @@ error:
2046 return ret; 2368 return ret;
2047} 2369}
2048 2370
2049static inline u64 aligned_size(u64 host_addr, size_t size) 2371static inline unsigned long aligned_nrpages(unsigned long host_addr,
2050{ 2372 size_t size)
2051 u64 addr;
2052 addr = (host_addr & (~PAGE_MASK)) + size;
2053 return PAGE_ALIGN(addr);
2054}
2055
2056struct iova *
2057iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2058{ 2373{
2059 struct iova *piova; 2374 host_addr &= ~PAGE_MASK;
2060 2375 host_addr += size + PAGE_SIZE - 1;
2061 /* Make sure it's in range */
2062 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2063 if (!size || (IOVA_START_ADDR + size > end))
2064 return NULL;
2065 2376
2066 piova = alloc_iova(&domain->iovad, 2377 return host_addr >> VTD_PAGE_SHIFT;
2067 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2068 return piova;
2069} 2378}
2070 2379
2071static struct iova * 2380static struct iova *intel_alloc_iova(struct device *dev,
2072__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, 2381 struct dmar_domain *domain,
2073 size_t size, u64 dma_mask) 2382 unsigned long nrpages, uint64_t dma_mask)
2074{ 2383{
2075 struct pci_dev *pdev = to_pci_dev(dev); 2384 struct pci_dev *pdev = to_pci_dev(dev);
2076 struct iova *iova = NULL; 2385 struct iova *iova = NULL;
2077 2386
2078 if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) 2387 /* Restrict dma_mask to the width that the iommu can handle */
2079 iova = iommu_alloc_iova(domain, size, dma_mask); 2388 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2080 else { 2389
2390 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2081 /* 2391 /*
2082 * First try to allocate an io virtual address in 2392 * First try to allocate an io virtual address in
2083 * DMA_BIT_MASK(32) and if that fails then try allocating 2393 * DMA_BIT_MASK(32) and if that fails then try allocating
2084 * from higher range 2394 * from higher range
2085 */ 2395 */
2086 iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); 2396 iova = alloc_iova(&domain->iovad, nrpages,
2087 if (!iova) 2397 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2088 iova = iommu_alloc_iova(domain, size, dma_mask); 2398 if (iova)
2089 } 2399 return iova;
2090 2400 }
2091 if (!iova) { 2401 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2092 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 2402 if (unlikely(!iova)) {
2403 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2404 nrpages, pci_name(pdev));
2093 return NULL; 2405 return NULL;
2094 } 2406 }
2095 2407
@@ -2112,7 +2424,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
2112 2424
2113 /* make sure context mapping is ok */ 2425 /* make sure context mapping is ok */
2114 if (unlikely(!domain_context_mapped(pdev))) { 2426 if (unlikely(!domain_context_mapped(pdev))) {
2115 ret = domain_context_mapping(domain, pdev); 2427 ret = domain_context_mapping(domain, pdev,
2428 CONTEXT_TT_MULTI_LEVEL);
2116 if (ret) { 2429 if (ret) {
2117 printk(KERN_ERR 2430 printk(KERN_ERR
2118 "Domain context map for %s failed", 2431 "Domain context map for %s failed",
@@ -2124,6 +2437,52 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
2124 return domain; 2437 return domain;
2125} 2438}
2126 2439
2440static int iommu_dummy(struct pci_dev *pdev)
2441{
2442 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2443}
2444
2445/* Check if the pdev needs to go through non-identity map and unmap process.*/
2446static int iommu_no_mapping(struct pci_dev *pdev)
2447{
2448 int found;
2449
2450 if (!iommu_identity_mapping)
2451 return iommu_dummy(pdev);
2452
2453 found = identity_mapping(pdev);
2454 if (found) {
2455 if (pdev->dma_mask > DMA_BIT_MASK(32))
2456 return 1;
2457 else {
2458 /*
2459 * 32 bit DMA is removed from si_domain and fall back
2460 * to non-identity mapping.
2461 */
2462 domain_remove_one_dev_info(si_domain, pdev);
2463 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2464 pci_name(pdev));
2465 return 0;
2466 }
2467 } else {
2468 /*
2469 * In case of a detached 64 bit DMA device from vm, the device
2470 * is put into si_domain for identity mapping.
2471 */
2472 if (pdev->dma_mask > DMA_BIT_MASK(32)) {
2473 int ret;
2474 ret = domain_add_dev_info(si_domain, pdev);
2475 if (!ret) {
2476 printk(KERN_INFO "64bit %s uses identity mapping\n",
2477 pci_name(pdev));
2478 return 1;
2479 }
2480 }
2481 }
2482
2483 return iommu_dummy(pdev);
2484}
2485
2127static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, 2486static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2128 size_t size, int dir, u64 dma_mask) 2487 size_t size, int dir, u64 dma_mask)
2129{ 2488{
@@ -2136,7 +2495,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2136 struct intel_iommu *iommu; 2495 struct intel_iommu *iommu;
2137 2496
2138 BUG_ON(dir == DMA_NONE); 2497 BUG_ON(dir == DMA_NONE);
2139 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2498
2499 if (iommu_no_mapping(pdev))
2140 return paddr; 2500 return paddr;
2141 2501
2142 domain = get_valid_domain_for_dev(pdev); 2502 domain = get_valid_domain_for_dev(pdev);
@@ -2144,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2144 return 0; 2504 return 0;
2145 2505
2146 iommu = domain_get_iommu(domain); 2506 iommu = domain_get_iommu(domain);
2147 size = aligned_size((u64)paddr, size); 2507 size = aligned_nrpages(paddr, size);
2148 2508
2149 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2509 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2150 if (!iova) 2510 if (!iova)
2151 goto error; 2511 goto error;
2152 2512
2153 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2154
2155 /* 2513 /*
2156 * Check if DMAR supports zero-length reads on write only 2514 * Check if DMAR supports zero-length reads on write only
2157 * mappings.. 2515 * mappings..
@@ -2167,19 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2167 * might have two guest_addr mapping to the same host paddr, but this 2525 * might have two guest_addr mapping to the same host paddr, but this
2168 * is not a big problem 2526 * is not a big problem
2169 */ 2527 */
2170 ret = domain_page_mapping(domain, start_paddr, 2528 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2171 ((u64)paddr) & PHYSICAL_PAGE_MASK, 2529 paddr >> VTD_PAGE_SHIFT, size, prot);
2172 size, prot);
2173 if (ret) 2530 if (ret)
2174 goto error; 2531 goto error;
2175 2532
2176 /* it's a non-present to present mapping */ 2533 /* it's a non-present to present mapping. Only flush if caching mode */
2177 ret = iommu_flush_iotlb_psi(iommu, domain->id, 2534 if (cap_caching_mode(iommu->cap))
2178 start_paddr, size >> VTD_PAGE_SHIFT, 1); 2535 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2179 if (ret) 2536 else
2180 iommu_flush_write_buffer(iommu); 2537 iommu_flush_write_buffer(iommu);
2181 2538
2182 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2539 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2540 start_paddr += paddr & ~PAGE_MASK;
2541 return start_paddr;
2183 2542
2184error: 2543error:
2185 if (iova) 2544 if (iova)
@@ -2210,15 +2569,22 @@ static void flush_unmaps(void)
2210 if (!iommu) 2569 if (!iommu)
2211 continue; 2570 continue;
2212 2571
2213 if (deferred_flush[i].next) { 2572 if (!deferred_flush[i].next)
2214 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2573 continue;
2215 DMA_TLB_GLOBAL_FLUSH, 0); 2574
2216 for (j = 0; j < deferred_flush[i].next; j++) { 2575 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2217 __free_iova(&deferred_flush[i].domain[j]->iovad, 2576 DMA_TLB_GLOBAL_FLUSH);
2218 deferred_flush[i].iova[j]); 2577 for (j = 0; j < deferred_flush[i].next; j++) {
2219 } 2578 unsigned long mask;
2220 deferred_flush[i].next = 0; 2579 struct iova *iova = deferred_flush[i].iova[j];
2580
2581 mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT;
2582 mask = ilog2(mask >> VTD_PAGE_SHIFT);
2583 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2584 iova->pfn_lo << PAGE_SHIFT, mask);
2585 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2221 } 2586 }
2587 deferred_flush[i].next = 0;
2222 } 2588 }
2223 2589
2224 list_size = 0; 2590 list_size = 0;
@@ -2265,35 +2631,38 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2265{ 2631{
2266 struct pci_dev *pdev = to_pci_dev(dev); 2632 struct pci_dev *pdev = to_pci_dev(dev);
2267 struct dmar_domain *domain; 2633 struct dmar_domain *domain;
2268 unsigned long start_addr; 2634 unsigned long start_pfn, last_pfn;
2269 struct iova *iova; 2635 struct iova *iova;
2270 struct intel_iommu *iommu; 2636 struct intel_iommu *iommu;
2271 2637
2272 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2638 if (iommu_no_mapping(pdev))
2273 return; 2639 return;
2640
2274 domain = find_domain(pdev); 2641 domain = find_domain(pdev);
2275 BUG_ON(!domain); 2642 BUG_ON(!domain);
2276 2643
2277 iommu = domain_get_iommu(domain); 2644 iommu = domain_get_iommu(domain);
2278 2645
2279 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2646 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2280 if (!iova) 2647 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2648 (unsigned long long)dev_addr))
2281 return; 2649 return;
2282 2650
2283 start_addr = iova->pfn_lo << PAGE_SHIFT; 2651 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2284 size = aligned_size((u64)dev_addr, size); 2652 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2285 2653
2286 pr_debug("Device %s unmapping: %zx@%llx\n", 2654 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2287 pci_name(pdev), size, (unsigned long long)start_addr); 2655 pci_name(pdev), start_pfn, last_pfn);
2288 2656
2289 /* clear the whole page */ 2657 /* clear the whole page */
2290 dma_pte_clear_range(domain, start_addr, start_addr + size); 2658 dma_pte_clear_range(domain, start_pfn, last_pfn);
2659
2291 /* free page tables */ 2660 /* free page tables */
2292 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2661 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2662
2293 if (intel_iommu_strict) { 2663 if (intel_iommu_strict) {
2294 if (iommu_flush_iotlb_psi(iommu, 2664 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2295 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) 2665 last_pfn - start_pfn + 1);
2296 iommu_flush_write_buffer(iommu);
2297 /* free iova */ 2666 /* free iova */
2298 __free_iova(&domain->iovad, iova); 2667 __free_iova(&domain->iovad, iova);
2299 } else { 2668 } else {
@@ -2351,17 +2720,13 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2351 int nelems, enum dma_data_direction dir, 2720 int nelems, enum dma_data_direction dir,
2352 struct dma_attrs *attrs) 2721 struct dma_attrs *attrs)
2353{ 2722{
2354 int i;
2355 struct pci_dev *pdev = to_pci_dev(hwdev); 2723 struct pci_dev *pdev = to_pci_dev(hwdev);
2356 struct dmar_domain *domain; 2724 struct dmar_domain *domain;
2357 unsigned long start_addr; 2725 unsigned long start_pfn, last_pfn;
2358 struct iova *iova; 2726 struct iova *iova;
2359 size_t size = 0;
2360 phys_addr_t addr;
2361 struct scatterlist *sg;
2362 struct intel_iommu *iommu; 2727 struct intel_iommu *iommu;
2363 2728
2364 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2729 if (iommu_no_mapping(pdev))
2365 return; 2730 return;
2366 2731
2367 domain = find_domain(pdev); 2732 domain = find_domain(pdev);
@@ -2370,23 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2370 iommu = domain_get_iommu(domain); 2735 iommu = domain_get_iommu(domain);
2371 2736
2372 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2737 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2373 if (!iova) 2738 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2739 (unsigned long long)sglist[0].dma_address))
2374 return; 2740 return;
2375 for_each_sg(sglist, sg, nelems, i) {
2376 addr = page_to_phys(sg_page(sg)) + sg->offset;
2377 size += aligned_size((u64)addr, sg->length);
2378 }
2379 2741
2380 start_addr = iova->pfn_lo << PAGE_SHIFT; 2742 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2743 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2381 2744
2382 /* clear the whole page */ 2745 /* clear the whole page */
2383 dma_pte_clear_range(domain, start_addr, start_addr + size); 2746 dma_pte_clear_range(domain, start_pfn, last_pfn);
2747
2384 /* free page tables */ 2748 /* free page tables */
2385 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2749 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2386 2750
2387 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2751 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2388 size >> VTD_PAGE_SHIFT, 0)) 2752 (last_pfn - start_pfn + 1));
2389 iommu_flush_write_buffer(iommu);
2390 2753
2391 /* free iova */ 2754 /* free iova */
2392 __free_iova(&domain->iovad, iova); 2755 __free_iova(&domain->iovad, iova);
@@ -2409,21 +2772,20 @@ static int intel_nontranslate_map_sg(struct device *hddev,
2409static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2772static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2410 enum dma_data_direction dir, struct dma_attrs *attrs) 2773 enum dma_data_direction dir, struct dma_attrs *attrs)
2411{ 2774{
2412 phys_addr_t addr;
2413 int i; 2775 int i;
2414 struct pci_dev *pdev = to_pci_dev(hwdev); 2776 struct pci_dev *pdev = to_pci_dev(hwdev);
2415 struct dmar_domain *domain; 2777 struct dmar_domain *domain;
2416 size_t size = 0; 2778 size_t size = 0;
2417 int prot = 0; 2779 int prot = 0;
2418 size_t offset = 0; 2780 size_t offset_pfn = 0;
2419 struct iova *iova = NULL; 2781 struct iova *iova = NULL;
2420 int ret; 2782 int ret;
2421 struct scatterlist *sg; 2783 struct scatterlist *sg;
2422 unsigned long start_addr; 2784 unsigned long start_vpfn;
2423 struct intel_iommu *iommu; 2785 struct intel_iommu *iommu;
2424 2786
2425 BUG_ON(dir == DMA_NONE); 2787 BUG_ON(dir == DMA_NONE);
2426 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2788 if (iommu_no_mapping(pdev))
2427 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); 2789 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2428 2790
2429 domain = get_valid_domain_for_dev(pdev); 2791 domain = get_valid_domain_for_dev(pdev);
@@ -2432,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2432 2794
2433 iommu = domain_get_iommu(domain); 2795 iommu = domain_get_iommu(domain);
2434 2796
2435 for_each_sg(sglist, sg, nelems, i) { 2797 for_each_sg(sglist, sg, nelems, i)
2436 addr = page_to_phys(sg_page(sg)) + sg->offset; 2798 size += aligned_nrpages(sg->offset, sg->length);
2437 size += aligned_size((u64)addr, sg->length);
2438 }
2439 2799
2440 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2800 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2441 if (!iova) { 2801 if (!iova) {
2442 sglist->dma_length = 0; 2802 sglist->dma_length = 0;
2443 return 0; 2803 return 0;
@@ -2453,35 +2813,27 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2453 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2813 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2454 prot |= DMA_PTE_WRITE; 2814 prot |= DMA_PTE_WRITE;
2455 2815
2456 start_addr = iova->pfn_lo << PAGE_SHIFT; 2816 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2457 offset = 0; 2817
2458 for_each_sg(sglist, sg, nelems, i) { 2818 ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
2459 addr = page_to_phys(sg_page(sg)) + sg->offset; 2819 if (unlikely(ret)) {
2460 size = aligned_size((u64)addr, sg->length); 2820 /* clear the page */
2461 ret = domain_page_mapping(domain, start_addr + offset, 2821 dma_pte_clear_range(domain, start_vpfn,
2462 ((u64)addr) & PHYSICAL_PAGE_MASK, 2822 start_vpfn + size - 1);
2463 size, prot); 2823 /* free page tables */
2464 if (ret) { 2824 dma_pte_free_pagetable(domain, start_vpfn,
2465 /* clear the page */ 2825 start_vpfn + size - 1);
2466 dma_pte_clear_range(domain, start_addr, 2826 /* free iova */
2467 start_addr + offset); 2827 __free_iova(&domain->iovad, iova);
2468 /* free page tables */ 2828 return 0;
2469 dma_pte_free_pagetable(domain, start_addr,
2470 start_addr + offset);
2471 /* free iova */
2472 __free_iova(&domain->iovad, iova);
2473 return 0;
2474 }
2475 sg->dma_address = start_addr + offset +
2476 ((u64)addr & (~PAGE_MASK));
2477 sg->dma_length = sg->length;
2478 offset += size;
2479 } 2829 }
2480 2830
2481 /* it's a non-present to present mapping */ 2831 /* it's a non-present to present mapping. Only flush if caching mode */
2482 if (iommu_flush_iotlb_psi(iommu, domain->id, 2832 if (cap_caching_mode(iommu->cap))
2483 start_addr, offset >> VTD_PAGE_SHIFT, 1)) 2833 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2834 else
2484 iommu_flush_write_buffer(iommu); 2835 iommu_flush_write_buffer(iommu);
2836
2485 return nelems; 2837 return nelems;
2486} 2838}
2487 2839
@@ -2640,9 +2992,9 @@ static int init_iommu_hw(void)
2640 iommu_set_root_entry(iommu); 2992 iommu_set_root_entry(iommu);
2641 2993
2642 iommu->flush.flush_context(iommu, 0, 0, 0, 2994 iommu->flush.flush_context(iommu, 0, 0, 0,
2643 DMA_CCMD_GLOBAL_INVL, 0); 2995 DMA_CCMD_GLOBAL_INVL);
2644 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2996 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2645 DMA_TLB_GLOBAL_FLUSH, 0); 2997 DMA_TLB_GLOBAL_FLUSH);
2646 iommu_disable_protect_mem_regions(iommu); 2998 iommu_disable_protect_mem_regions(iommu);
2647 iommu_enable_translation(iommu); 2999 iommu_enable_translation(iommu);
2648 } 3000 }
@@ -2657,9 +3009,9 @@ static void iommu_flush_all(void)
2657 3009
2658 for_each_active_iommu(iommu, drhd) { 3010 for_each_active_iommu(iommu, drhd) {
2659 iommu->flush.flush_context(iommu, 0, 0, 0, 3011 iommu->flush.flush_context(iommu, 0, 0, 0,
2660 DMA_CCMD_GLOBAL_INVL, 0); 3012 DMA_CCMD_GLOBAL_INVL);
2661 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3013 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2662 DMA_TLB_GLOBAL_FLUSH, 0); 3014 DMA_TLB_GLOBAL_FLUSH);
2663 } 3015 }
2664} 3016}
2665 3017
@@ -2782,7 +3134,7 @@ int __init intel_iommu_init(void)
2782 * Check the need for DMA-remapping initialization now. 3134 * Check the need for DMA-remapping initialization now.
2783 * Above initialization will also be used by Interrupt-remapping. 3135 * Above initialization will also be used by Interrupt-remapping.
2784 */ 3136 */
2785 if (no_iommu || swiotlb || dmar_disabled) 3137 if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled)
2786 return -ENODEV; 3138 return -ENODEV;
2787 3139
2788 iommu_init_mempool(); 3140 iommu_init_mempool();
@@ -2802,35 +3154,18 @@ int __init intel_iommu_init(void)
2802 3154
2803 init_timer(&unmap_timer); 3155 init_timer(&unmap_timer);
2804 force_iommu = 1; 3156 force_iommu = 1;
2805 dma_ops = &intel_dma_ops;
2806 init_iommu_sysfs();
2807
2808 register_iommu(&intel_iommu_ops);
2809 3157
2810 return 0; 3158 if (!iommu_pass_through) {
2811} 3159 printk(KERN_INFO
3160 "Multi-level page-table translation for DMAR.\n");
3161 dma_ops = &intel_dma_ops;
3162 } else
3163 printk(KERN_INFO
3164 "DMAR: Pass through translation for DMAR.\n");
2812 3165
2813static int vm_domain_add_dev_info(struct dmar_domain *domain, 3166 init_iommu_sysfs();
2814 struct pci_dev *pdev)
2815{
2816 struct device_domain_info *info;
2817 unsigned long flags;
2818
2819 info = alloc_devinfo_mem();
2820 if (!info)
2821 return -ENOMEM;
2822
2823 info->segment = pci_domain_nr(pdev->bus);
2824 info->bus = pdev->bus->number;
2825 info->devfn = pdev->devfn;
2826 info->dev = pdev;
2827 info->domain = domain;
2828 3167
2829 spin_lock_irqsave(&device_domain_lock, flags); 3168 register_iommu(&intel_iommu_ops);
2830 list_add(&info->link, &domain->devices);
2831 list_add(&info->global, &device_domain_list);
2832 pdev->dev.archdata.iommu = info;
2833 spin_unlock_irqrestore(&device_domain_lock, flags);
2834 3169
2835 return 0; 3170 return 0;
2836} 3171}
@@ -2862,7 +3197,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
2862 } 3197 }
2863} 3198}
2864 3199
2865static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, 3200static void domain_remove_one_dev_info(struct dmar_domain *domain,
2866 struct pci_dev *pdev) 3201 struct pci_dev *pdev)
2867{ 3202{
2868 struct device_domain_info *info; 3203 struct device_domain_info *info;
@@ -2888,6 +3223,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2888 info->dev->dev.archdata.iommu = NULL; 3223 info->dev->dev.archdata.iommu = NULL;
2889 spin_unlock_irqrestore(&device_domain_lock, flags); 3224 spin_unlock_irqrestore(&device_domain_lock, flags);
2890 3225
3226 iommu_disable_dev_iotlb(info);
2891 iommu_detach_dev(iommu, info->bus, info->devfn); 3227 iommu_detach_dev(iommu, info->bus, info->devfn);
2892 iommu_detach_dependent_devices(iommu, pdev); 3228 iommu_detach_dependent_devices(iommu, pdev);
2893 free_devinfo_mem(info); 3229 free_devinfo_mem(info);
@@ -2938,6 +3274,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2938 3274
2939 spin_unlock_irqrestore(&device_domain_lock, flags1); 3275 spin_unlock_irqrestore(&device_domain_lock, flags1);
2940 3276
3277 iommu_disable_dev_iotlb(info);
2941 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 3278 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
2942 iommu_detach_dev(iommu, info->bus, info->devfn); 3279 iommu_detach_dev(iommu, info->bus, info->devfn);
2943 iommu_detach_dependent_devices(iommu, info->dev); 3280 iommu_detach_dependent_devices(iommu, info->dev);
@@ -2993,12 +3330,11 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
2993 return domain; 3330 return domain;
2994} 3331}
2995 3332
2996static int vm_domain_init(struct dmar_domain *domain, int guest_width) 3333static int md_domain_init(struct dmar_domain *domain, int guest_width)
2997{ 3334{
2998 int adjust_width; 3335 int adjust_width;
2999 3336
3000 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3337 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3001 spin_lock_init(&domain->mapping_lock);
3002 spin_lock_init(&domain->iommu_lock); 3338 spin_lock_init(&domain->iommu_lock);
3003 3339
3004 domain_reserve_special_ranges(domain); 3340 domain_reserve_special_ranges(domain);
@@ -3052,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
3052 3388
3053static void vm_domain_exit(struct dmar_domain *domain) 3389static void vm_domain_exit(struct dmar_domain *domain)
3054{ 3390{
3055 u64 end;
3056
3057 /* Domain 0 is reserved, so dont process it */ 3391 /* Domain 0 is reserved, so dont process it */
3058 if (!domain) 3392 if (!domain)
3059 return; 3393 return;
@@ -3061,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
3061 vm_domain_remove_all_dev_info(domain); 3395 vm_domain_remove_all_dev_info(domain);
3062 /* destroy iovas */ 3396 /* destroy iovas */
3063 put_iova_domain(&domain->iovad); 3397 put_iova_domain(&domain->iovad);
3064 end = DOMAIN_MAX_ADDR(domain->gaw);
3065 end = end & (~VTD_PAGE_MASK);
3066 3398
3067 /* clear ptes */ 3399 /* clear ptes */
3068 dma_pte_clear_range(domain, 0, end); 3400 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3069 3401
3070 /* free page tables */ 3402 /* free page tables */
3071 dma_pte_free_pagetable(domain, 0, end); 3403 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3072 3404
3073 iommu_free_vm_domain(domain); 3405 iommu_free_vm_domain(domain);
3074 free_domain_mem(domain); 3406 free_domain_mem(domain);
@@ -3084,7 +3416,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
3084 "intel_iommu_domain_init: dmar_domain == NULL\n"); 3416 "intel_iommu_domain_init: dmar_domain == NULL\n");
3085 return -ENOMEM; 3417 return -ENOMEM;
3086 } 3418 }
3087 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 3419 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3088 printk(KERN_ERR 3420 printk(KERN_ERR
3089 "intel_iommu_domain_init() failed\n"); 3421 "intel_iommu_domain_init() failed\n");
3090 vm_domain_exit(dmar_domain); 3422 vm_domain_exit(dmar_domain);
@@ -3119,8 +3451,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3119 3451
3120 old_domain = find_domain(pdev); 3452 old_domain = find_domain(pdev);
3121 if (old_domain) { 3453 if (old_domain) {
3122 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 3454 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3123 vm_domain_remove_one_dev_info(old_domain, pdev); 3455 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3456 domain_remove_one_dev_info(old_domain, pdev);
3124 else 3457 else
3125 domain_remove_dev_info(old_domain); 3458 domain_remove_dev_info(old_domain);
3126 } 3459 }
@@ -3142,11 +3475,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3142 return -EFAULT; 3475 return -EFAULT;
3143 } 3476 }
3144 3477
3145 ret = domain_context_mapping(dmar_domain, pdev); 3478 ret = domain_add_dev_info(dmar_domain, pdev);
3146 if (ret) 3479 if (ret)
3147 return ret; 3480 return ret;
3148 3481
3149 ret = vm_domain_add_dev_info(dmar_domain, pdev); 3482 ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3150 return ret; 3483 return ret;
3151} 3484}
3152 3485
@@ -3156,7 +3489,7 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
3156 struct dmar_domain *dmar_domain = domain->priv; 3489 struct dmar_domain *dmar_domain = domain->priv;
3157 struct pci_dev *pdev = to_pci_dev(dev); 3490 struct pci_dev *pdev = to_pci_dev(dev);
3158 3491
3159 vm_domain_remove_one_dev_info(dmar_domain, pdev); 3492 domain_remove_one_dev_info(dmar_domain, pdev);
3160} 3493}
3161 3494
3162static int intel_iommu_map_range(struct iommu_domain *domain, 3495static int intel_iommu_map_range(struct iommu_domain *domain,
@@ -3176,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3176 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3509 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3177 prot |= DMA_PTE_SNP; 3510 prot |= DMA_PTE_SNP;
3178 3511
3179 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 3512 max_addr = iova + size;
3180 if (dmar_domain->max_addr < max_addr) { 3513 if (dmar_domain->max_addr < max_addr) {
3181 int min_agaw; 3514 int min_agaw;
3182 u64 end; 3515 u64 end;
@@ -3194,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3194 } 3527 }
3195 dmar_domain->max_addr = max_addr; 3528 dmar_domain->max_addr = max_addr;
3196 } 3529 }
3197 3530 /* Round up size to next multiple of PAGE_SIZE, if it and
3198 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 3531 the low bits of hpa would take us onto the next page */
3532 size = aligned_nrpages(hpa, size);
3533 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3534 hpa >> VTD_PAGE_SHIFT, size, prot);
3199 return ret; 3535 return ret;
3200} 3536}
3201 3537
@@ -3203,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
3203 unsigned long iova, size_t size) 3539 unsigned long iova, size_t size)
3204{ 3540{
3205 struct dmar_domain *dmar_domain = domain->priv; 3541 struct dmar_domain *dmar_domain = domain->priv;
3206 dma_addr_t base;
3207 3542
3208 /* The address might not be aligned */ 3543 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3209 base = iova & VTD_PAGE_MASK; 3544 (iova + size - 1) >> VTD_PAGE_SHIFT);
3210 size = VTD_PAGE_ALIGN(size);
3211 dma_pte_clear_range(dmar_domain, base, base + size);
3212 3545
3213 if (dmar_domain->max_addr == base + size) 3546 if (dmar_domain->max_addr == iova + size)
3214 dmar_domain->max_addr = base; 3547 dmar_domain->max_addr = iova;
3215} 3548}
3216 3549
3217static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3550static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3221,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3221 struct dma_pte *pte; 3554 struct dma_pte *pte;
3222 u64 phys = 0; 3555 u64 phys = 0;
3223 3556
3224 pte = addr_to_dma_pte(dmar_domain, iova); 3557 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3225 if (pte) 3558 if (pte)
3226 phys = dma_pte_addr(pte); 3559 phys = dma_pte_addr(pte);
3227 3560