diff options
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r-- | drivers/pci/intel-iommu.c | 1375 |
1 files changed, 854 insertions, 521 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index cd389162735f..53075424a434 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
@@ -53,15 +53,35 @@ | |||
53 | 53 | ||
54 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 | 54 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 |
55 | 55 | ||
56 | #define MAX_AGAW_WIDTH 64 | ||
57 | |||
56 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) | 58 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) |
59 | #define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1) | ||
57 | 60 | ||
58 | #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) | 61 | #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) |
59 | #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) | 62 | #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) |
60 | #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) | 63 | #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) |
61 | 64 | ||
62 | #ifndef PHYSICAL_PAGE_MASK | 65 | |
63 | #define PHYSICAL_PAGE_MASK PAGE_MASK | 66 | /* VT-d pages must always be _smaller_ than MM pages. Otherwise things |
64 | #endif | 67 | are never going to work. */ |
68 | static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) | ||
69 | { | ||
70 | return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); | ||
71 | } | ||
72 | |||
73 | static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) | ||
74 | { | ||
75 | return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); | ||
76 | } | ||
77 | static inline unsigned long page_to_dma_pfn(struct page *pg) | ||
78 | { | ||
79 | return mm_to_dma_pfn(page_to_pfn(pg)); | ||
80 | } | ||
81 | static inline unsigned long virt_to_dma_pfn(void *p) | ||
82 | { | ||
83 | return page_to_dma_pfn(virt_to_page(p)); | ||
84 | } | ||
65 | 85 | ||
66 | /* global iommu list, set NULL for ignored DMAR units */ | 86 | /* global iommu list, set NULL for ignored DMAR units */ |
67 | static struct intel_iommu **g_iommus; | 87 | static struct intel_iommu **g_iommus; |
@@ -131,8 +151,6 @@ static inline void context_set_fault_enable(struct context_entry *context) | |||
131 | context->lo &= (((u64)-1) << 2) | 1; | 151 | context->lo &= (((u64)-1) << 2) | 1; |
132 | } | 152 | } |
133 | 153 | ||
134 | #define CONTEXT_TT_MULTI_LEVEL 0 | ||
135 | |||
136 | static inline void context_set_translation_type(struct context_entry *context, | 154 | static inline void context_set_translation_type(struct context_entry *context, |
137 | unsigned long value) | 155 | unsigned long value) |
138 | { | 156 | { |
@@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) | |||
204 | 222 | ||
205 | static inline u64 dma_pte_addr(struct dma_pte *pte) | 223 | static inline u64 dma_pte_addr(struct dma_pte *pte) |
206 | { | 224 | { |
207 | return (pte->val & VTD_PAGE_MASK); | 225 | #ifdef CONFIG_64BIT |
226 | return pte->val & VTD_PAGE_MASK; | ||
227 | #else | ||
228 | /* Must have a full atomic 64-bit read */ | ||
229 | return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK; | ||
230 | #endif | ||
208 | } | 231 | } |
209 | 232 | ||
210 | static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) | 233 | static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) |
211 | { | 234 | { |
212 | pte->val |= (addr & VTD_PAGE_MASK); | 235 | pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; |
213 | } | 236 | } |
214 | 237 | ||
215 | static inline bool dma_pte_present(struct dma_pte *pte) | 238 | static inline bool dma_pte_present(struct dma_pte *pte) |
@@ -217,6 +240,19 @@ static inline bool dma_pte_present(struct dma_pte *pte) | |||
217 | return (pte->val & 3) != 0; | 240 | return (pte->val & 3) != 0; |
218 | } | 241 | } |
219 | 242 | ||
243 | static inline int first_pte_in_page(struct dma_pte *pte) | ||
244 | { | ||
245 | return !((unsigned long)pte & ~VTD_PAGE_MASK); | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * This domain is a statically identity mapping domain. | ||
250 | * 1. This domain creats a static 1:1 mapping to all usable memory. | ||
251 | * 2. It maps to each iommu if successful. | ||
252 | * 3. Each iommu mapps to this domain if successful. | ||
253 | */ | ||
254 | struct dmar_domain *si_domain; | ||
255 | |||
220 | /* devices under the same p2p bridge are owned in one domain */ | 256 | /* devices under the same p2p bridge are owned in one domain */ |
221 | #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) | 257 | #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) |
222 | 258 | ||
@@ -225,6 +261,9 @@ static inline bool dma_pte_present(struct dma_pte *pte) | |||
225 | */ | 261 | */ |
226 | #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) | 262 | #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) |
227 | 263 | ||
264 | /* si_domain contains mulitple devices */ | ||
265 | #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2) | ||
266 | |||
228 | struct dmar_domain { | 267 | struct dmar_domain { |
229 | int id; /* domain id */ | 268 | int id; /* domain id */ |
230 | unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ | 269 | unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ |
@@ -233,7 +272,6 @@ struct dmar_domain { | |||
233 | struct iova_domain iovad; /* iova's that belong to this domain */ | 272 | struct iova_domain iovad; /* iova's that belong to this domain */ |
234 | 273 | ||
235 | struct dma_pte *pgd; /* virtual address */ | 274 | struct dma_pte *pgd; /* virtual address */ |
236 | spinlock_t mapping_lock; /* page table lock */ | ||
237 | int gaw; /* max guest address width */ | 275 | int gaw; /* max guest address width */ |
238 | 276 | ||
239 | /* adjusted guest address width, 0 is level 2 30-bit */ | 277 | /* adjusted guest address width, 0 is level 2 30-bit */ |
@@ -256,6 +294,7 @@ struct device_domain_info { | |||
256 | u8 bus; /* PCI bus number */ | 294 | u8 bus; /* PCI bus number */ |
257 | u8 devfn; /* PCI devfn number */ | 295 | u8 devfn; /* PCI devfn number */ |
258 | struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ | 296 | struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ |
297 | struct intel_iommu *iommu; /* IOMMU used by this device */ | ||
259 | struct dmar_domain *domain; /* pointer to domain */ | 298 | struct dmar_domain *domain; /* pointer to domain */ |
260 | }; | 299 | }; |
261 | 300 | ||
@@ -401,17 +440,13 @@ void free_iova_mem(struct iova *iova) | |||
401 | 440 | ||
402 | static inline int width_to_agaw(int width); | 441 | static inline int width_to_agaw(int width); |
403 | 442 | ||
404 | /* calculate agaw for each iommu. | 443 | static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) |
405 | * "SAGAW" may be different across iommus, use a default agaw, and | ||
406 | * get a supported less agaw for iommus that don't support the default agaw. | ||
407 | */ | ||
408 | int iommu_calculate_agaw(struct intel_iommu *iommu) | ||
409 | { | 444 | { |
410 | unsigned long sagaw; | 445 | unsigned long sagaw; |
411 | int agaw = -1; | 446 | int agaw = -1; |
412 | 447 | ||
413 | sagaw = cap_sagaw(iommu->cap); | 448 | sagaw = cap_sagaw(iommu->cap); |
414 | for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); | 449 | for (agaw = width_to_agaw(max_gaw); |
415 | agaw >= 0; agaw--) { | 450 | agaw >= 0; agaw--) { |
416 | if (test_bit(agaw, &sagaw)) | 451 | if (test_bit(agaw, &sagaw)) |
417 | break; | 452 | break; |
@@ -420,12 +455,32 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) | |||
420 | return agaw; | 455 | return agaw; |
421 | } | 456 | } |
422 | 457 | ||
423 | /* in native case, each domain is related to only one iommu */ | 458 | /* |
459 | * Calculate max SAGAW for each iommu. | ||
460 | */ | ||
461 | int iommu_calculate_max_sagaw(struct intel_iommu *iommu) | ||
462 | { | ||
463 | return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * calculate agaw for each iommu. | ||
468 | * "SAGAW" may be different across iommus, use a default agaw, and | ||
469 | * get a supported less agaw for iommus that don't support the default agaw. | ||
470 | */ | ||
471 | int iommu_calculate_agaw(struct intel_iommu *iommu) | ||
472 | { | ||
473 | return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
474 | } | ||
475 | |||
476 | /* This functionin only returns single iommu in a domain */ | ||
424 | static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) | 477 | static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) |
425 | { | 478 | { |
426 | int iommu_id; | 479 | int iommu_id; |
427 | 480 | ||
481 | /* si_domain and vm domain should not get here. */ | ||
428 | BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); | 482 | BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); |
483 | BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY); | ||
429 | 484 | ||
430 | iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); | 485 | iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); |
431 | if (iommu_id < 0 || iommu_id >= g_num_of_iommus) | 486 | if (iommu_id < 0 || iommu_id >= g_num_of_iommus) |
@@ -620,80 +675,78 @@ static inline int width_to_agaw(int width) | |||
620 | 675 | ||
621 | static inline unsigned int level_to_offset_bits(int level) | 676 | static inline unsigned int level_to_offset_bits(int level) |
622 | { | 677 | { |
623 | return (12 + (level - 1) * LEVEL_STRIDE); | 678 | return (level - 1) * LEVEL_STRIDE; |
624 | } | 679 | } |
625 | 680 | ||
626 | static inline int address_level_offset(u64 addr, int level) | 681 | static inline int pfn_level_offset(unsigned long pfn, int level) |
627 | { | 682 | { |
628 | return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); | 683 | return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; |
629 | } | 684 | } |
630 | 685 | ||
631 | static inline u64 level_mask(int level) | 686 | static inline unsigned long level_mask(int level) |
632 | { | 687 | { |
633 | return ((u64)-1 << level_to_offset_bits(level)); | 688 | return -1UL << level_to_offset_bits(level); |
634 | } | 689 | } |
635 | 690 | ||
636 | static inline u64 level_size(int level) | 691 | static inline unsigned long level_size(int level) |
637 | { | 692 | { |
638 | return ((u64)1 << level_to_offset_bits(level)); | 693 | return 1UL << level_to_offset_bits(level); |
639 | } | 694 | } |
640 | 695 | ||
641 | static inline u64 align_to_level(u64 addr, int level) | 696 | static inline unsigned long align_to_level(unsigned long pfn, int level) |
642 | { | 697 | { |
643 | return ((addr + level_size(level) - 1) & level_mask(level)); | 698 | return (pfn + level_size(level) - 1) & level_mask(level); |
644 | } | 699 | } |
645 | 700 | ||
646 | static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) | 701 | static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, |
702 | unsigned long pfn) | ||
647 | { | 703 | { |
648 | int addr_width = agaw_to_width(domain->agaw); | 704 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
649 | struct dma_pte *parent, *pte = NULL; | 705 | struct dma_pte *parent, *pte = NULL; |
650 | int level = agaw_to_level(domain->agaw); | 706 | int level = agaw_to_level(domain->agaw); |
651 | int offset; | 707 | int offset; |
652 | unsigned long flags; | ||
653 | 708 | ||
654 | BUG_ON(!domain->pgd); | 709 | BUG_ON(!domain->pgd); |
655 | 710 | BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); | |
656 | addr &= (((u64)1) << addr_width) - 1; | ||
657 | parent = domain->pgd; | 711 | parent = domain->pgd; |
658 | 712 | ||
659 | spin_lock_irqsave(&domain->mapping_lock, flags); | ||
660 | while (level > 0) { | 713 | while (level > 0) { |
661 | void *tmp_page; | 714 | void *tmp_page; |
662 | 715 | ||
663 | offset = address_level_offset(addr, level); | 716 | offset = pfn_level_offset(pfn, level); |
664 | pte = &parent[offset]; | 717 | pte = &parent[offset]; |
665 | if (level == 1) | 718 | if (level == 1) |
666 | break; | 719 | break; |
667 | 720 | ||
668 | if (!dma_pte_present(pte)) { | 721 | if (!dma_pte_present(pte)) { |
722 | uint64_t pteval; | ||
723 | |||
669 | tmp_page = alloc_pgtable_page(); | 724 | tmp_page = alloc_pgtable_page(); |
670 | 725 | ||
671 | if (!tmp_page) { | 726 | if (!tmp_page) |
672 | spin_unlock_irqrestore(&domain->mapping_lock, | ||
673 | flags); | ||
674 | return NULL; | 727 | return NULL; |
728 | |||
729 | domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); | ||
730 | pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; | ||
731 | if (cmpxchg64(&pte->val, 0ULL, pteval)) { | ||
732 | /* Someone else set it while we were thinking; use theirs. */ | ||
733 | free_pgtable_page(tmp_page); | ||
734 | } else { | ||
735 | dma_pte_addr(pte); | ||
736 | domain_flush_cache(domain, pte, sizeof(*pte)); | ||
675 | } | 737 | } |
676 | domain_flush_cache(domain, tmp_page, PAGE_SIZE); | ||
677 | dma_set_pte_addr(pte, virt_to_phys(tmp_page)); | ||
678 | /* | ||
679 | * high level table always sets r/w, last level page | ||
680 | * table control read/write | ||
681 | */ | ||
682 | dma_set_pte_readable(pte); | ||
683 | dma_set_pte_writable(pte); | ||
684 | domain_flush_cache(domain, pte, sizeof(*pte)); | ||
685 | } | 738 | } |
686 | parent = phys_to_virt(dma_pte_addr(pte)); | 739 | parent = phys_to_virt(dma_pte_addr(pte)); |
687 | level--; | 740 | level--; |
688 | } | 741 | } |
689 | 742 | ||
690 | spin_unlock_irqrestore(&domain->mapping_lock, flags); | ||
691 | return pte; | 743 | return pte; |
692 | } | 744 | } |
693 | 745 | ||
694 | /* return address's pte at specific level */ | 746 | /* return address's pte at specific level */ |
695 | static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, | 747 | static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, |
696 | int level) | 748 | unsigned long pfn, |
749 | int level) | ||
697 | { | 750 | { |
698 | struct dma_pte *parent, *pte = NULL; | 751 | struct dma_pte *parent, *pte = NULL; |
699 | int total = agaw_to_level(domain->agaw); | 752 | int total = agaw_to_level(domain->agaw); |
@@ -701,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, | |||
701 | 754 | ||
702 | parent = domain->pgd; | 755 | parent = domain->pgd; |
703 | while (level <= total) { | 756 | while (level <= total) { |
704 | offset = address_level_offset(addr, total); | 757 | offset = pfn_level_offset(pfn, total); |
705 | pte = &parent[offset]; | 758 | pte = &parent[offset]; |
706 | if (level == total) | 759 | if (level == total) |
707 | return pte; | 760 | return pte; |
@@ -714,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, | |||
714 | return NULL; | 767 | return NULL; |
715 | } | 768 | } |
716 | 769 | ||
717 | /* clear one page's page table */ | ||
718 | static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) | ||
719 | { | ||
720 | struct dma_pte *pte = NULL; | ||
721 | |||
722 | /* get last level pte */ | ||
723 | pte = dma_addr_level_pte(domain, addr, 1); | ||
724 | |||
725 | if (pte) { | ||
726 | dma_clear_pte(pte); | ||
727 | domain_flush_cache(domain, pte, sizeof(*pte)); | ||
728 | } | ||
729 | } | ||
730 | |||
731 | /* clear last level pte, a tlb flush should be followed */ | 770 | /* clear last level pte, a tlb flush should be followed */ |
732 | static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) | 771 | static void dma_pte_clear_range(struct dmar_domain *domain, |
772 | unsigned long start_pfn, | ||
773 | unsigned long last_pfn) | ||
733 | { | 774 | { |
734 | int addr_width = agaw_to_width(domain->agaw); | 775 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
735 | int npages; | 776 | struct dma_pte *first_pte, *pte; |
736 | 777 | ||
737 | start &= (((u64)1) << addr_width) - 1; | 778 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); |
738 | end &= (((u64)1) << addr_width) - 1; | 779 | BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); |
739 | /* in case it's partial page */ | ||
740 | start &= PAGE_MASK; | ||
741 | end = PAGE_ALIGN(end); | ||
742 | npages = (end - start) / VTD_PAGE_SIZE; | ||
743 | 780 | ||
744 | /* we don't need lock here, nobody else touches the iova range */ | 781 | /* we don't need lock here; nobody else touches the iova range */ |
745 | while (npages--) { | 782 | while (start_pfn <= last_pfn) { |
746 | dma_pte_clear_one(domain, start); | 783 | first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); |
747 | start += VTD_PAGE_SIZE; | 784 | if (!pte) { |
785 | start_pfn = align_to_level(start_pfn + 1, 2); | ||
786 | continue; | ||
787 | } | ||
788 | do { | ||
789 | dma_clear_pte(pte); | ||
790 | start_pfn++; | ||
791 | pte++; | ||
792 | } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); | ||
793 | |||
794 | domain_flush_cache(domain, first_pte, | ||
795 | (void *)pte - (void *)first_pte); | ||
748 | } | 796 | } |
749 | } | 797 | } |
750 | 798 | ||
751 | /* free page table pages. last level pte should already be cleared */ | 799 | /* free page table pages. last level pte should already be cleared */ |
752 | static void dma_pte_free_pagetable(struct dmar_domain *domain, | 800 | static void dma_pte_free_pagetable(struct dmar_domain *domain, |
753 | u64 start, u64 end) | 801 | unsigned long start_pfn, |
802 | unsigned long last_pfn) | ||
754 | { | 803 | { |
755 | int addr_width = agaw_to_width(domain->agaw); | 804 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
756 | struct dma_pte *pte; | 805 | struct dma_pte *first_pte, *pte; |
757 | int total = agaw_to_level(domain->agaw); | 806 | int total = agaw_to_level(domain->agaw); |
758 | int level; | 807 | int level; |
759 | u64 tmp; | 808 | unsigned long tmp; |
760 | 809 | ||
761 | start &= (((u64)1) << addr_width) - 1; | 810 | BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); |
762 | end &= (((u64)1) << addr_width) - 1; | 811 | BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); |
763 | 812 | ||
764 | /* we don't need lock here, nobody else touches the iova range */ | 813 | /* We don't need lock here; nobody else touches the iova range */ |
765 | level = 2; | 814 | level = 2; |
766 | while (level <= total) { | 815 | while (level <= total) { |
767 | tmp = align_to_level(start, level); | 816 | tmp = align_to_level(start_pfn, level); |
768 | if (tmp >= end || (tmp + level_size(level) > end)) | 817 | |
818 | /* If we can't even clear one PTE at this level, we're done */ | ||
819 | if (tmp + level_size(level) - 1 > last_pfn) | ||
769 | return; | 820 | return; |
770 | 821 | ||
771 | while (tmp < end) { | 822 | while (tmp + level_size(level) - 1 <= last_pfn) { |
772 | pte = dma_addr_level_pte(domain, tmp, level); | 823 | first_pte = pte = dma_pfn_level_pte(domain, tmp, level); |
773 | if (pte) { | 824 | if (!pte) { |
774 | free_pgtable_page( | 825 | tmp = align_to_level(tmp + 1, level + 1); |
775 | phys_to_virt(dma_pte_addr(pte))); | 826 | continue; |
776 | dma_clear_pte(pte); | ||
777 | domain_flush_cache(domain, pte, sizeof(*pte)); | ||
778 | } | 827 | } |
779 | tmp += level_size(level); | 828 | do { |
829 | if (dma_pte_present(pte)) { | ||
830 | free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); | ||
831 | dma_clear_pte(pte); | ||
832 | } | ||
833 | pte++; | ||
834 | tmp += level_size(level); | ||
835 | } while (!first_pte_in_page(pte) && | ||
836 | tmp + level_size(level) - 1 <= last_pfn); | ||
837 | |||
838 | domain_flush_cache(domain, first_pte, | ||
839 | (void *)pte - (void *)first_pte); | ||
840 | |||
780 | } | 841 | } |
781 | level++; | 842 | level++; |
782 | } | 843 | } |
783 | /* free pgd */ | 844 | /* free pgd */ |
784 | if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { | 845 | if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { |
785 | free_pgtable_page(domain->pgd); | 846 | free_pgtable_page(domain->pgd); |
786 | domain->pgd = NULL; | 847 | domain->pgd = NULL; |
787 | } | 848 | } |
@@ -809,7 +870,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) | |||
809 | static void iommu_set_root_entry(struct intel_iommu *iommu) | 870 | static void iommu_set_root_entry(struct intel_iommu *iommu) |
810 | { | 871 | { |
811 | void *addr; | 872 | void *addr; |
812 | u32 cmd, sts; | 873 | u32 sts; |
813 | unsigned long flag; | 874 | unsigned long flag; |
814 | 875 | ||
815 | addr = iommu->root_entry; | 876 | addr = iommu->root_entry; |
@@ -817,12 +878,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) | |||
817 | spin_lock_irqsave(&iommu->register_lock, flag); | 878 | spin_lock_irqsave(&iommu->register_lock, flag); |
818 | dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); | 879 | dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); |
819 | 880 | ||
820 | cmd = iommu->gcmd | DMA_GCMD_SRTP; | 881 | writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); |
821 | writel(cmd, iommu->reg + DMAR_GCMD_REG); | ||
822 | 882 | ||
823 | /* Make sure hardware complete it */ | 883 | /* Make sure hardware complete it */ |
824 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | 884 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, |
825 | readl, (sts & DMA_GSTS_RTPS), sts); | 885 | readl, (sts & DMA_GSTS_RTPS), sts); |
826 | 886 | ||
827 | spin_unlock_irqrestore(&iommu->register_lock, flag); | 887 | spin_unlock_irqrestore(&iommu->register_lock, flag); |
828 | } | 888 | } |
@@ -834,39 +894,25 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) | |||
834 | 894 | ||
835 | if (!rwbf_quirk && !cap_rwbf(iommu->cap)) | 895 | if (!rwbf_quirk && !cap_rwbf(iommu->cap)) |
836 | return; | 896 | return; |
837 | val = iommu->gcmd | DMA_GCMD_WBF; | ||
838 | 897 | ||
839 | spin_lock_irqsave(&iommu->register_lock, flag); | 898 | spin_lock_irqsave(&iommu->register_lock, flag); |
840 | writel(val, iommu->reg + DMAR_GCMD_REG); | 899 | writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); |
841 | 900 | ||
842 | /* Make sure hardware complete it */ | 901 | /* Make sure hardware complete it */ |
843 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | 902 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, |
844 | readl, (!(val & DMA_GSTS_WBFS)), val); | 903 | readl, (!(val & DMA_GSTS_WBFS)), val); |
845 | 904 | ||
846 | spin_unlock_irqrestore(&iommu->register_lock, flag); | 905 | spin_unlock_irqrestore(&iommu->register_lock, flag); |
847 | } | 906 | } |
848 | 907 | ||
849 | /* return value determine if we need a write buffer flush */ | 908 | /* return value determine if we need a write buffer flush */ |
850 | static int __iommu_flush_context(struct intel_iommu *iommu, | 909 | static void __iommu_flush_context(struct intel_iommu *iommu, |
851 | u16 did, u16 source_id, u8 function_mask, u64 type, | 910 | u16 did, u16 source_id, u8 function_mask, |
852 | int non_present_entry_flush) | 911 | u64 type) |
853 | { | 912 | { |
854 | u64 val = 0; | 913 | u64 val = 0; |
855 | unsigned long flag; | 914 | unsigned long flag; |
856 | 915 | ||
857 | /* | ||
858 | * In the non-present entry flush case, if hardware doesn't cache | ||
859 | * non-present entry we do nothing and if hardware cache non-present | ||
860 | * entry, we flush entries of domain 0 (the domain id is used to cache | ||
861 | * any non-present entries) | ||
862 | */ | ||
863 | if (non_present_entry_flush) { | ||
864 | if (!cap_caching_mode(iommu->cap)) | ||
865 | return 1; | ||
866 | else | ||
867 | did = 0; | ||
868 | } | ||
869 | |||
870 | switch (type) { | 916 | switch (type) { |
871 | case DMA_CCMD_GLOBAL_INVL: | 917 | case DMA_CCMD_GLOBAL_INVL: |
872 | val = DMA_CCMD_GLOBAL_INVL; | 918 | val = DMA_CCMD_GLOBAL_INVL; |
@@ -891,33 +937,16 @@ static int __iommu_flush_context(struct intel_iommu *iommu, | |||
891 | dmar_readq, (!(val & DMA_CCMD_ICC)), val); | 937 | dmar_readq, (!(val & DMA_CCMD_ICC)), val); |
892 | 938 | ||
893 | spin_unlock_irqrestore(&iommu->register_lock, flag); | 939 | spin_unlock_irqrestore(&iommu->register_lock, flag); |
894 | |||
895 | /* flush context entry will implicitly flush write buffer */ | ||
896 | return 0; | ||
897 | } | 940 | } |
898 | 941 | ||
899 | /* return value determine if we need a write buffer flush */ | 942 | /* return value determine if we need a write buffer flush */ |
900 | static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, | 943 | static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, |
901 | u64 addr, unsigned int size_order, u64 type, | 944 | u64 addr, unsigned int size_order, u64 type) |
902 | int non_present_entry_flush) | ||
903 | { | 945 | { |
904 | int tlb_offset = ecap_iotlb_offset(iommu->ecap); | 946 | int tlb_offset = ecap_iotlb_offset(iommu->ecap); |
905 | u64 val = 0, val_iva = 0; | 947 | u64 val = 0, val_iva = 0; |
906 | unsigned long flag; | 948 | unsigned long flag; |
907 | 949 | ||
908 | /* | ||
909 | * In the non-present entry flush case, if hardware doesn't cache | ||
910 | * non-present entry we do nothing and if hardware cache non-present | ||
911 | * entry, we flush entries of domain 0 (the domain id is used to cache | ||
912 | * any non-present entries) | ||
913 | */ | ||
914 | if (non_present_entry_flush) { | ||
915 | if (!cap_caching_mode(iommu->cap)) | ||
916 | return 1; | ||
917 | else | ||
918 | did = 0; | ||
919 | } | ||
920 | |||
921 | switch (type) { | 950 | switch (type) { |
922 | case DMA_TLB_GLOBAL_FLUSH: | 951 | case DMA_TLB_GLOBAL_FLUSH: |
923 | /* global flush doesn't need set IVA_REG */ | 952 | /* global flush doesn't need set IVA_REG */ |
@@ -965,37 +994,106 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, | |||
965 | pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", | 994 | pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", |
966 | (unsigned long long)DMA_TLB_IIRG(type), | 995 | (unsigned long long)DMA_TLB_IIRG(type), |
967 | (unsigned long long)DMA_TLB_IAIG(val)); | 996 | (unsigned long long)DMA_TLB_IAIG(val)); |
968 | /* flush iotlb entry will implicitly flush write buffer */ | ||
969 | return 0; | ||
970 | } | 997 | } |
971 | 998 | ||
972 | static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, | 999 | static struct device_domain_info *iommu_support_dev_iotlb( |
973 | u64 addr, unsigned int pages, int non_present_entry_flush) | 1000 | struct dmar_domain *domain, int segment, u8 bus, u8 devfn) |
974 | { | 1001 | { |
975 | unsigned int mask; | 1002 | int found = 0; |
1003 | unsigned long flags; | ||
1004 | struct device_domain_info *info; | ||
1005 | struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); | ||
976 | 1006 | ||
977 | BUG_ON(addr & (~VTD_PAGE_MASK)); | 1007 | if (!ecap_dev_iotlb_support(iommu->ecap)) |
978 | BUG_ON(pages == 0); | 1008 | return NULL; |
1009 | |||
1010 | if (!iommu->qi) | ||
1011 | return NULL; | ||
1012 | |||
1013 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1014 | list_for_each_entry(info, &domain->devices, link) | ||
1015 | if (info->bus == bus && info->devfn == devfn) { | ||
1016 | found = 1; | ||
1017 | break; | ||
1018 | } | ||
1019 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1020 | |||
1021 | if (!found || !info->dev) | ||
1022 | return NULL; | ||
1023 | |||
1024 | if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) | ||
1025 | return NULL; | ||
1026 | |||
1027 | if (!dmar_find_matched_atsr_unit(info->dev)) | ||
1028 | return NULL; | ||
1029 | |||
1030 | info->iommu = iommu; | ||
1031 | |||
1032 | return info; | ||
1033 | } | ||
1034 | |||
1035 | static void iommu_enable_dev_iotlb(struct device_domain_info *info) | ||
1036 | { | ||
1037 | if (!info) | ||
1038 | return; | ||
1039 | |||
1040 | pci_enable_ats(info->dev, VTD_PAGE_SHIFT); | ||
1041 | } | ||
1042 | |||
1043 | static void iommu_disable_dev_iotlb(struct device_domain_info *info) | ||
1044 | { | ||
1045 | if (!info->dev || !pci_ats_enabled(info->dev)) | ||
1046 | return; | ||
1047 | |||
1048 | pci_disable_ats(info->dev); | ||
1049 | } | ||
1050 | |||
1051 | static void iommu_flush_dev_iotlb(struct dmar_domain *domain, | ||
1052 | u64 addr, unsigned mask) | ||
1053 | { | ||
1054 | u16 sid, qdep; | ||
1055 | unsigned long flags; | ||
1056 | struct device_domain_info *info; | ||
1057 | |||
1058 | spin_lock_irqsave(&device_domain_lock, flags); | ||
1059 | list_for_each_entry(info, &domain->devices, link) { | ||
1060 | if (!info->dev || !pci_ats_enabled(info->dev)) | ||
1061 | continue; | ||
1062 | |||
1063 | sid = info->bus << 8 | info->devfn; | ||
1064 | qdep = pci_ats_queue_depth(info->dev); | ||
1065 | qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); | ||
1066 | } | ||
1067 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
1068 | } | ||
979 | 1069 | ||
980 | /* Fallback to domain selective flush if no PSI support */ | 1070 | static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, |
981 | if (!cap_pgsel_inv(iommu->cap)) | 1071 | unsigned long pfn, unsigned int pages) |
982 | return iommu->flush.flush_iotlb(iommu, did, 0, 0, | 1072 | { |
983 | DMA_TLB_DSI_FLUSH, | 1073 | unsigned int mask = ilog2(__roundup_pow_of_two(pages)); |
984 | non_present_entry_flush); | 1074 | uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; |
1075 | |||
1076 | BUG_ON(pages == 0); | ||
985 | 1077 | ||
986 | /* | 1078 | /* |
1079 | * Fallback to domain selective flush if no PSI support or the size is | ||
1080 | * too big. | ||
987 | * PSI requires page size to be 2 ^ x, and the base address is naturally | 1081 | * PSI requires page size to be 2 ^ x, and the base address is naturally |
988 | * aligned to the size | 1082 | * aligned to the size |
989 | */ | 1083 | */ |
990 | mask = ilog2(__roundup_pow_of_two(pages)); | 1084 | if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) |
991 | /* Fallback to domain selective flush if size is too big */ | 1085 | iommu->flush.flush_iotlb(iommu, did, 0, 0, |
992 | if (mask > cap_max_amask_val(iommu->cap)) | 1086 | DMA_TLB_DSI_FLUSH); |
993 | return iommu->flush.flush_iotlb(iommu, did, 0, 0, | 1087 | else |
994 | DMA_TLB_DSI_FLUSH, non_present_entry_flush); | 1088 | iommu->flush.flush_iotlb(iommu, did, addr, mask, |
1089 | DMA_TLB_PSI_FLUSH); | ||
995 | 1090 | ||
996 | return iommu->flush.flush_iotlb(iommu, did, addr, mask, | 1091 | /* |
997 | DMA_TLB_PSI_FLUSH, | 1092 | * In caching mode, domain ID 0 is reserved for non-present to present |
998 | non_present_entry_flush); | 1093 | * mapping flush. Device IOTLB doesn't need to be flushed in this case. |
1094 | */ | ||
1095 | if (!cap_caching_mode(iommu->cap) || did) | ||
1096 | iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); | ||
999 | } | 1097 | } |
1000 | 1098 | ||
1001 | static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) | 1099 | static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) |
@@ -1021,13 +1119,13 @@ static int iommu_enable_translation(struct intel_iommu *iommu) | |||
1021 | unsigned long flags; | 1119 | unsigned long flags; |
1022 | 1120 | ||
1023 | spin_lock_irqsave(&iommu->register_lock, flags); | 1121 | spin_lock_irqsave(&iommu->register_lock, flags); |
1024 | writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); | 1122 | iommu->gcmd |= DMA_GCMD_TE; |
1123 | writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); | ||
1025 | 1124 | ||
1026 | /* Make sure hardware complete it */ | 1125 | /* Make sure hardware complete it */ |
1027 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | 1126 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, |
1028 | readl, (sts & DMA_GSTS_TES), sts); | 1127 | readl, (sts & DMA_GSTS_TES), sts); |
1029 | 1128 | ||
1030 | iommu->gcmd |= DMA_GCMD_TE; | ||
1031 | spin_unlock_irqrestore(&iommu->register_lock, flags); | 1129 | spin_unlock_irqrestore(&iommu->register_lock, flags); |
1032 | return 0; | 1130 | return 0; |
1033 | } | 1131 | } |
@@ -1043,7 +1141,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu) | |||
1043 | 1141 | ||
1044 | /* Make sure hardware complete it */ | 1142 | /* Make sure hardware complete it */ |
1045 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, | 1143 | IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, |
1046 | readl, (!(sts & DMA_GSTS_TES)), sts); | 1144 | readl, (!(sts & DMA_GSTS_TES)), sts); |
1047 | 1145 | ||
1048 | spin_unlock_irqrestore(&iommu->register_lock, flag); | 1146 | spin_unlock_irqrestore(&iommu->register_lock, flag); |
1049 | return 0; | 1147 | return 0; |
@@ -1142,48 +1240,71 @@ void free_dmar_iommu(struct intel_iommu *iommu) | |||
1142 | free_context_table(iommu); | 1240 | free_context_table(iommu); |
1143 | } | 1241 | } |
1144 | 1242 | ||
1145 | static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) | 1243 | static struct dmar_domain *alloc_domain(void) |
1146 | { | 1244 | { |
1147 | unsigned long num; | ||
1148 | unsigned long ndomains; | ||
1149 | struct dmar_domain *domain; | 1245 | struct dmar_domain *domain; |
1150 | unsigned long flags; | ||
1151 | 1246 | ||
1152 | domain = alloc_domain_mem(); | 1247 | domain = alloc_domain_mem(); |
1153 | if (!domain) | 1248 | if (!domain) |
1154 | return NULL; | 1249 | return NULL; |
1155 | 1250 | ||
1251 | memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); | ||
1252 | domain->flags = 0; | ||
1253 | |||
1254 | return domain; | ||
1255 | } | ||
1256 | |||
1257 | static int iommu_attach_domain(struct dmar_domain *domain, | ||
1258 | struct intel_iommu *iommu) | ||
1259 | { | ||
1260 | int num; | ||
1261 | unsigned long ndomains; | ||
1262 | unsigned long flags; | ||
1263 | |||
1156 | ndomains = cap_ndoms(iommu->cap); | 1264 | ndomains = cap_ndoms(iommu->cap); |
1157 | 1265 | ||
1158 | spin_lock_irqsave(&iommu->lock, flags); | 1266 | spin_lock_irqsave(&iommu->lock, flags); |
1267 | |||
1159 | num = find_first_zero_bit(iommu->domain_ids, ndomains); | 1268 | num = find_first_zero_bit(iommu->domain_ids, ndomains); |
1160 | if (num >= ndomains) { | 1269 | if (num >= ndomains) { |
1161 | spin_unlock_irqrestore(&iommu->lock, flags); | 1270 | spin_unlock_irqrestore(&iommu->lock, flags); |
1162 | free_domain_mem(domain); | ||
1163 | printk(KERN_ERR "IOMMU: no free domain ids\n"); | 1271 | printk(KERN_ERR "IOMMU: no free domain ids\n"); |
1164 | return NULL; | 1272 | return -ENOMEM; |
1165 | } | 1273 | } |
1166 | 1274 | ||
1167 | set_bit(num, iommu->domain_ids); | ||
1168 | domain->id = num; | 1275 | domain->id = num; |
1169 | memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); | 1276 | set_bit(num, iommu->domain_ids); |
1170 | set_bit(iommu->seq_id, &domain->iommu_bmp); | 1277 | set_bit(iommu->seq_id, &domain->iommu_bmp); |
1171 | domain->flags = 0; | ||
1172 | iommu->domains[num] = domain; | 1278 | iommu->domains[num] = domain; |
1173 | spin_unlock_irqrestore(&iommu->lock, flags); | 1279 | spin_unlock_irqrestore(&iommu->lock, flags); |
1174 | 1280 | ||
1175 | return domain; | 1281 | return 0; |
1176 | } | 1282 | } |
1177 | 1283 | ||
1178 | static void iommu_free_domain(struct dmar_domain *domain) | 1284 | static void iommu_detach_domain(struct dmar_domain *domain, |
1285 | struct intel_iommu *iommu) | ||
1179 | { | 1286 | { |
1180 | unsigned long flags; | 1287 | unsigned long flags; |
1181 | struct intel_iommu *iommu; | 1288 | int num, ndomains; |
1182 | 1289 | int found = 0; | |
1183 | iommu = domain_get_iommu(domain); | ||
1184 | 1290 | ||
1185 | spin_lock_irqsave(&iommu->lock, flags); | 1291 | spin_lock_irqsave(&iommu->lock, flags); |
1186 | clear_bit(domain->id, iommu->domain_ids); | 1292 | ndomains = cap_ndoms(iommu->cap); |
1293 | num = find_first_bit(iommu->domain_ids, ndomains); | ||
1294 | for (; num < ndomains; ) { | ||
1295 | if (iommu->domains[num] == domain) { | ||
1296 | found = 1; | ||
1297 | break; | ||
1298 | } | ||
1299 | num = find_next_bit(iommu->domain_ids, | ||
1300 | cap_ndoms(iommu->cap), num+1); | ||
1301 | } | ||
1302 | |||
1303 | if (found) { | ||
1304 | clear_bit(num, iommu->domain_ids); | ||
1305 | clear_bit(iommu->seq_id, &domain->iommu_bmp); | ||
1306 | iommu->domains[num] = NULL; | ||
1307 | } | ||
1187 | spin_unlock_irqrestore(&iommu->lock, flags); | 1308 | spin_unlock_irqrestore(&iommu->lock, flags); |
1188 | } | 1309 | } |
1189 | 1310 | ||
@@ -1196,7 +1317,6 @@ static void dmar_init_reserved_ranges(void) | |||
1196 | struct pci_dev *pdev = NULL; | 1317 | struct pci_dev *pdev = NULL; |
1197 | struct iova *iova; | 1318 | struct iova *iova; |
1198 | int i; | 1319 | int i; |
1199 | u64 addr, size; | ||
1200 | 1320 | ||
1201 | init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); | 1321 | init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); |
1202 | 1322 | ||
@@ -1219,12 +1339,9 @@ static void dmar_init_reserved_ranges(void) | |||
1219 | r = &pdev->resource[i]; | 1339 | r = &pdev->resource[i]; |
1220 | if (!r->flags || !(r->flags & IORESOURCE_MEM)) | 1340 | if (!r->flags || !(r->flags & IORESOURCE_MEM)) |
1221 | continue; | 1341 | continue; |
1222 | addr = r->start; | 1342 | iova = reserve_iova(&reserved_iova_list, |
1223 | addr &= PHYSICAL_PAGE_MASK; | 1343 | IOVA_PFN(r->start), |
1224 | size = r->end - addr; | 1344 | IOVA_PFN(r->end)); |
1225 | size = PAGE_ALIGN(size); | ||
1226 | iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), | ||
1227 | IOVA_PFN(size + addr) - 1); | ||
1228 | if (!iova) | 1345 | if (!iova) |
1229 | printk(KERN_ERR "Reserve iova failed\n"); | 1346 | printk(KERN_ERR "Reserve iova failed\n"); |
1230 | } | 1347 | } |
@@ -1258,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width) | |||
1258 | unsigned long sagaw; | 1375 | unsigned long sagaw; |
1259 | 1376 | ||
1260 | init_iova_domain(&domain->iovad, DMA_32BIT_PFN); | 1377 | init_iova_domain(&domain->iovad, DMA_32BIT_PFN); |
1261 | spin_lock_init(&domain->mapping_lock); | ||
1262 | spin_lock_init(&domain->iommu_lock); | 1378 | spin_lock_init(&domain->iommu_lock); |
1263 | 1379 | ||
1264 | domain_reserve_special_ranges(domain); | 1380 | domain_reserve_special_ranges(domain); |
@@ -1303,7 +1419,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width) | |||
1303 | 1419 | ||
1304 | static void domain_exit(struct dmar_domain *domain) | 1420 | static void domain_exit(struct dmar_domain *domain) |
1305 | { | 1421 | { |
1306 | u64 end; | 1422 | struct dmar_drhd_unit *drhd; |
1423 | struct intel_iommu *iommu; | ||
1307 | 1424 | ||
1308 | /* Domain 0 is reserved, so dont process it */ | 1425 | /* Domain 0 is reserved, so dont process it */ |
1309 | if (!domain) | 1426 | if (!domain) |
@@ -1312,21 +1429,22 @@ static void domain_exit(struct dmar_domain *domain) | |||
1312 | domain_remove_dev_info(domain); | 1429 | domain_remove_dev_info(domain); |
1313 | /* destroy iovas */ | 1430 | /* destroy iovas */ |
1314 | put_iova_domain(&domain->iovad); | 1431 | put_iova_domain(&domain->iovad); |
1315 | end = DOMAIN_MAX_ADDR(domain->gaw); | ||
1316 | end = end & (~PAGE_MASK); | ||
1317 | 1432 | ||
1318 | /* clear ptes */ | 1433 | /* clear ptes */ |
1319 | dma_pte_clear_range(domain, 0, end); | 1434 | dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); |
1320 | 1435 | ||
1321 | /* free page tables */ | 1436 | /* free page tables */ |
1322 | dma_pte_free_pagetable(domain, 0, end); | 1437 | dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); |
1438 | |||
1439 | for_each_active_iommu(iommu, drhd) | ||
1440 | if (test_bit(iommu->seq_id, &domain->iommu_bmp)) | ||
1441 | iommu_detach_domain(domain, iommu); | ||
1323 | 1442 | ||
1324 | iommu_free_domain(domain); | ||
1325 | free_domain_mem(domain); | 1443 | free_domain_mem(domain); |
1326 | } | 1444 | } |
1327 | 1445 | ||
1328 | static int domain_context_mapping_one(struct dmar_domain *domain, | 1446 | static int domain_context_mapping_one(struct dmar_domain *domain, int segment, |
1329 | int segment, u8 bus, u8 devfn) | 1447 | u8 bus, u8 devfn, int translation) |
1330 | { | 1448 | { |
1331 | struct context_entry *context; | 1449 | struct context_entry *context; |
1332 | unsigned long flags; | 1450 | unsigned long flags; |
@@ -1336,10 +1454,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1336 | unsigned long ndomains; | 1454 | unsigned long ndomains; |
1337 | int id; | 1455 | int id; |
1338 | int agaw; | 1456 | int agaw; |
1457 | struct device_domain_info *info = NULL; | ||
1339 | 1458 | ||
1340 | pr_debug("Set context mapping for %02x:%02x.%d\n", | 1459 | pr_debug("Set context mapping for %02x:%02x.%d\n", |
1341 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); | 1460 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1461 | |||
1342 | BUG_ON(!domain->pgd); | 1462 | BUG_ON(!domain->pgd); |
1463 | BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && | ||
1464 | translation != CONTEXT_TT_MULTI_LEVEL); | ||
1343 | 1465 | ||
1344 | iommu = device_to_iommu(segment, bus, devfn); | 1466 | iommu = device_to_iommu(segment, bus, devfn); |
1345 | if (!iommu) | 1467 | if (!iommu) |
@@ -1357,7 +1479,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1357 | id = domain->id; | 1479 | id = domain->id; |
1358 | pgd = domain->pgd; | 1480 | pgd = domain->pgd; |
1359 | 1481 | ||
1360 | if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { | 1482 | if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || |
1483 | domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) { | ||
1361 | int found = 0; | 1484 | int found = 0; |
1362 | 1485 | ||
1363 | /* find an available domain id for this device in iommu */ | 1486 | /* find an available domain id for this device in iommu */ |
@@ -1382,6 +1505,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1382 | } | 1505 | } |
1383 | 1506 | ||
1384 | set_bit(num, iommu->domain_ids); | 1507 | set_bit(num, iommu->domain_ids); |
1508 | set_bit(iommu->seq_id, &domain->iommu_bmp); | ||
1385 | iommu->domains[num] = domain; | 1509 | iommu->domains[num] = domain; |
1386 | id = num; | 1510 | id = num; |
1387 | } | 1511 | } |
@@ -1399,21 +1523,44 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1399 | } | 1523 | } |
1400 | 1524 | ||
1401 | context_set_domain_id(context, id); | 1525 | context_set_domain_id(context, id); |
1402 | context_set_address_width(context, iommu->agaw); | 1526 | |
1403 | context_set_address_root(context, virt_to_phys(pgd)); | 1527 | if (translation != CONTEXT_TT_PASS_THROUGH) { |
1404 | context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); | 1528 | info = iommu_support_dev_iotlb(domain, segment, bus, devfn); |
1529 | translation = info ? CONTEXT_TT_DEV_IOTLB : | ||
1530 | CONTEXT_TT_MULTI_LEVEL; | ||
1531 | } | ||
1532 | /* | ||
1533 | * In pass through mode, AW must be programmed to indicate the largest | ||
1534 | * AGAW value supported by hardware. And ASR is ignored by hardware. | ||
1535 | */ | ||
1536 | if (unlikely(translation == CONTEXT_TT_PASS_THROUGH)) | ||
1537 | context_set_address_width(context, iommu->msagaw); | ||
1538 | else { | ||
1539 | context_set_address_root(context, virt_to_phys(pgd)); | ||
1540 | context_set_address_width(context, iommu->agaw); | ||
1541 | } | ||
1542 | |||
1543 | context_set_translation_type(context, translation); | ||
1405 | context_set_fault_enable(context); | 1544 | context_set_fault_enable(context); |
1406 | context_set_present(context); | 1545 | context_set_present(context); |
1407 | domain_flush_cache(domain, context, sizeof(*context)); | 1546 | domain_flush_cache(domain, context, sizeof(*context)); |
1408 | 1547 | ||
1409 | /* it's a non-present to present mapping */ | 1548 | /* |
1410 | if (iommu->flush.flush_context(iommu, domain->id, | 1549 | * It's a non-present to present mapping. If hardware doesn't cache |
1411 | (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, | 1550 | * non-present entry we only need to flush the write-buffer. If the |
1412 | DMA_CCMD_DEVICE_INVL, 1)) | 1551 | * _does_ cache non-present entries, then it does so in the special |
1552 | * domain #0, which we have to flush: | ||
1553 | */ | ||
1554 | if (cap_caching_mode(iommu->cap)) { | ||
1555 | iommu->flush.flush_context(iommu, 0, | ||
1556 | (((u16)bus) << 8) | devfn, | ||
1557 | DMA_CCMD_MASK_NOBIT, | ||
1558 | DMA_CCMD_DEVICE_INVL); | ||
1559 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); | ||
1560 | } else { | ||
1413 | iommu_flush_write_buffer(iommu); | 1561 | iommu_flush_write_buffer(iommu); |
1414 | else | 1562 | } |
1415 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); | 1563 | iommu_enable_dev_iotlb(info); |
1416 | |||
1417 | spin_unlock_irqrestore(&iommu->lock, flags); | 1564 | spin_unlock_irqrestore(&iommu->lock, flags); |
1418 | 1565 | ||
1419 | spin_lock_irqsave(&domain->iommu_lock, flags); | 1566 | spin_lock_irqsave(&domain->iommu_lock, flags); |
@@ -1426,13 +1573,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1426 | } | 1573 | } |
1427 | 1574 | ||
1428 | static int | 1575 | static int |
1429 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | 1576 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, |
1577 | int translation) | ||
1430 | { | 1578 | { |
1431 | int ret; | 1579 | int ret; |
1432 | struct pci_dev *tmp, *parent; | 1580 | struct pci_dev *tmp, *parent; |
1433 | 1581 | ||
1434 | ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), | 1582 | ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), |
1435 | pdev->bus->number, pdev->devfn); | 1583 | pdev->bus->number, pdev->devfn, |
1584 | translation); | ||
1436 | if (ret) | 1585 | if (ret) |
1437 | return ret; | 1586 | return ret; |
1438 | 1587 | ||
@@ -1446,7 +1595,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | |||
1446 | ret = domain_context_mapping_one(domain, | 1595 | ret = domain_context_mapping_one(domain, |
1447 | pci_domain_nr(parent->bus), | 1596 | pci_domain_nr(parent->bus), |
1448 | parent->bus->number, | 1597 | parent->bus->number, |
1449 | parent->devfn); | 1598 | parent->devfn, translation); |
1450 | if (ret) | 1599 | if (ret) |
1451 | return ret; | 1600 | return ret; |
1452 | parent = parent->bus->self; | 1601 | parent = parent->bus->self; |
@@ -1454,12 +1603,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | |||
1454 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ | 1603 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ |
1455 | return domain_context_mapping_one(domain, | 1604 | return domain_context_mapping_one(domain, |
1456 | pci_domain_nr(tmp->subordinate), | 1605 | pci_domain_nr(tmp->subordinate), |
1457 | tmp->subordinate->number, 0); | 1606 | tmp->subordinate->number, 0, |
1607 | translation); | ||
1458 | else /* this is a legacy PCI bridge */ | 1608 | else /* this is a legacy PCI bridge */ |
1459 | return domain_context_mapping_one(domain, | 1609 | return domain_context_mapping_one(domain, |
1460 | pci_domain_nr(tmp->bus), | 1610 | pci_domain_nr(tmp->bus), |
1461 | tmp->bus->number, | 1611 | tmp->bus->number, |
1462 | tmp->devfn); | 1612 | tmp->devfn, |
1613 | translation); | ||
1463 | } | 1614 | } |
1464 | 1615 | ||
1465 | static int domain_context_mapped(struct pci_dev *pdev) | 1616 | static int domain_context_mapped(struct pci_dev *pdev) |
@@ -1497,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev) | |||
1497 | tmp->devfn); | 1648 | tmp->devfn); |
1498 | } | 1649 | } |
1499 | 1650 | ||
1500 | static int | 1651 | static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, |
1501 | domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, | 1652 | struct scatterlist *sg, unsigned long phys_pfn, |
1502 | u64 hpa, size_t size, int prot) | 1653 | unsigned long nr_pages, int prot) |
1503 | { | 1654 | { |
1504 | u64 start_pfn, end_pfn; | 1655 | struct dma_pte *first_pte = NULL, *pte = NULL; |
1505 | struct dma_pte *pte; | 1656 | phys_addr_t uninitialized_var(pteval); |
1506 | int index; | 1657 | int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; |
1507 | int addr_width = agaw_to_width(domain->agaw); | 1658 | unsigned long sg_res; |
1508 | 1659 | ||
1509 | hpa &= (((u64)1) << addr_width) - 1; | 1660 | BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); |
1510 | 1661 | ||
1511 | if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) | 1662 | if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) |
1512 | return -EINVAL; | 1663 | return -EINVAL; |
1513 | iova &= PAGE_MASK; | 1664 | |
1514 | start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; | 1665 | prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; |
1515 | end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; | 1666 | |
1516 | index = 0; | 1667 | if (sg) |
1517 | while (start_pfn < end_pfn) { | 1668 | sg_res = 0; |
1518 | pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); | 1669 | else { |
1519 | if (!pte) | 1670 | sg_res = nr_pages + 1; |
1520 | return -ENOMEM; | 1671 | pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; |
1672 | } | ||
1673 | |||
1674 | while (nr_pages--) { | ||
1675 | uint64_t tmp; | ||
1676 | |||
1677 | if (!sg_res) { | ||
1678 | sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; | ||
1679 | sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; | ||
1680 | sg->dma_length = sg->length; | ||
1681 | pteval = page_to_phys(sg_page(sg)) | prot; | ||
1682 | } | ||
1683 | if (!pte) { | ||
1684 | first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); | ||
1685 | if (!pte) | ||
1686 | return -ENOMEM; | ||
1687 | } | ||
1521 | /* We don't need lock here, nobody else | 1688 | /* We don't need lock here, nobody else |
1522 | * touches the iova range | 1689 | * touches the iova range |
1523 | */ | 1690 | */ |
1524 | BUG_ON(dma_pte_addr(pte)); | 1691 | tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); |
1525 | dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); | 1692 | if (tmp) { |
1526 | dma_set_pte_prot(pte, prot); | 1693 | static int dumps = 5; |
1527 | if (prot & DMA_PTE_SNP) | 1694 | printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", |
1528 | dma_set_pte_snp(pte); | 1695 | iov_pfn, tmp, (unsigned long long)pteval); |
1529 | domain_flush_cache(domain, pte, sizeof(*pte)); | 1696 | if (dumps) { |
1530 | start_pfn++; | 1697 | dumps--; |
1531 | index++; | 1698 | debug_dma_dump_mappings(NULL); |
1699 | } | ||
1700 | WARN_ON(1); | ||
1701 | } | ||
1702 | pte++; | ||
1703 | if (!nr_pages || first_pte_in_page(pte)) { | ||
1704 | domain_flush_cache(domain, first_pte, | ||
1705 | (void *)pte - (void *)first_pte); | ||
1706 | pte = NULL; | ||
1707 | } | ||
1708 | iov_pfn++; | ||
1709 | pteval += VTD_PAGE_SIZE; | ||
1710 | sg_res--; | ||
1711 | if (!sg_res) | ||
1712 | sg = sg_next(sg); | ||
1532 | } | 1713 | } |
1533 | return 0; | 1714 | return 0; |
1534 | } | 1715 | } |
1535 | 1716 | ||
1717 | static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | ||
1718 | struct scatterlist *sg, unsigned long nr_pages, | ||
1719 | int prot) | ||
1720 | { | ||
1721 | return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); | ||
1722 | } | ||
1723 | |||
1724 | static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, | ||
1725 | unsigned long phys_pfn, unsigned long nr_pages, | ||
1726 | int prot) | ||
1727 | { | ||
1728 | return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); | ||
1729 | } | ||
1730 | |||
1536 | static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) | 1731 | static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) |
1537 | { | 1732 | { |
1538 | if (!iommu) | 1733 | if (!iommu) |
@@ -1540,9 +1735,8 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) | |||
1540 | 1735 | ||
1541 | clear_context_table(iommu, bus, devfn); | 1736 | clear_context_table(iommu, bus, devfn); |
1542 | iommu->flush.flush_context(iommu, 0, 0, 0, | 1737 | iommu->flush.flush_context(iommu, 0, 0, 0, |
1543 | DMA_CCMD_GLOBAL_INVL, 0); | 1738 | DMA_CCMD_GLOBAL_INVL); |
1544 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, | 1739 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); |
1545 | DMA_TLB_GLOBAL_FLUSH, 0); | ||
1546 | } | 1740 | } |
1547 | 1741 | ||
1548 | static void domain_remove_dev_info(struct dmar_domain *domain) | 1742 | static void domain_remove_dev_info(struct dmar_domain *domain) |
@@ -1561,6 +1755,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) | |||
1561 | info->dev->dev.archdata.iommu = NULL; | 1755 | info->dev->dev.archdata.iommu = NULL; |
1562 | spin_unlock_irqrestore(&device_domain_lock, flags); | 1756 | spin_unlock_irqrestore(&device_domain_lock, flags); |
1563 | 1757 | ||
1758 | iommu_disable_dev_iotlb(info); | ||
1564 | iommu = device_to_iommu(info->segment, info->bus, info->devfn); | 1759 | iommu = device_to_iommu(info->segment, info->bus, info->devfn); |
1565 | iommu_detach_dev(iommu, info->bus, info->devfn); | 1760 | iommu_detach_dev(iommu, info->bus, info->devfn); |
1566 | free_devinfo_mem(info); | 1761 | free_devinfo_mem(info); |
@@ -1597,6 +1792,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) | |||
1597 | unsigned long flags; | 1792 | unsigned long flags; |
1598 | int bus = 0, devfn = 0; | 1793 | int bus = 0, devfn = 0; |
1599 | int segment; | 1794 | int segment; |
1795 | int ret; | ||
1600 | 1796 | ||
1601 | domain = find_domain(pdev); | 1797 | domain = find_domain(pdev); |
1602 | if (domain) | 1798 | if (domain) |
@@ -1629,6 +1825,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) | |||
1629 | } | 1825 | } |
1630 | } | 1826 | } |
1631 | 1827 | ||
1828 | domain = alloc_domain(); | ||
1829 | if (!domain) | ||
1830 | goto error; | ||
1831 | |||
1632 | /* Allocate new domain for the device */ | 1832 | /* Allocate new domain for the device */ |
1633 | drhd = dmar_find_matched_drhd_unit(pdev); | 1833 | drhd = dmar_find_matched_drhd_unit(pdev); |
1634 | if (!drhd) { | 1834 | if (!drhd) { |
@@ -1638,9 +1838,11 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) | |||
1638 | } | 1838 | } |
1639 | iommu = drhd->iommu; | 1839 | iommu = drhd->iommu; |
1640 | 1840 | ||
1641 | domain = iommu_alloc_domain(iommu); | 1841 | ret = iommu_attach_domain(domain, iommu); |
1642 | if (!domain) | 1842 | if (ret) { |
1843 | domain_exit(domain); | ||
1643 | goto error; | 1844 | goto error; |
1845 | } | ||
1644 | 1846 | ||
1645 | if (domain_init(domain, gaw)) { | 1847 | if (domain_init(domain, gaw)) { |
1646 | domain_exit(domain); | 1848 | domain_exit(domain); |
@@ -1714,55 +1916,63 @@ error: | |||
1714 | return find_domain(pdev); | 1916 | return find_domain(pdev); |
1715 | } | 1917 | } |
1716 | 1918 | ||
1919 | static int iommu_identity_mapping; | ||
1920 | |||
1921 | static int iommu_domain_identity_map(struct dmar_domain *domain, | ||
1922 | unsigned long long start, | ||
1923 | unsigned long long end) | ||
1924 | { | ||
1925 | unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; | ||
1926 | unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; | ||
1927 | |||
1928 | if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), | ||
1929 | dma_to_mm_pfn(last_vpfn))) { | ||
1930 | printk(KERN_ERR "IOMMU: reserve iova failed\n"); | ||
1931 | return -ENOMEM; | ||
1932 | } | ||
1933 | |||
1934 | pr_debug("Mapping reserved region %llx-%llx for domain %d\n", | ||
1935 | start, end, domain->id); | ||
1936 | /* | ||
1937 | * RMRR range might have overlap with physical memory range, | ||
1938 | * clear it first | ||
1939 | */ | ||
1940 | dma_pte_clear_range(domain, first_vpfn, last_vpfn); | ||
1941 | |||
1942 | return domain_pfn_mapping(domain, first_vpfn, first_vpfn, | ||
1943 | last_vpfn - first_vpfn + 1, | ||
1944 | DMA_PTE_READ|DMA_PTE_WRITE); | ||
1945 | } | ||
1946 | |||
1717 | static int iommu_prepare_identity_map(struct pci_dev *pdev, | 1947 | static int iommu_prepare_identity_map(struct pci_dev *pdev, |
1718 | unsigned long long start, | 1948 | unsigned long long start, |
1719 | unsigned long long end) | 1949 | unsigned long long end) |
1720 | { | 1950 | { |
1721 | struct dmar_domain *domain; | 1951 | struct dmar_domain *domain; |
1722 | unsigned long size; | ||
1723 | unsigned long long base; | ||
1724 | int ret; | 1952 | int ret; |
1725 | 1953 | ||
1726 | printk(KERN_INFO | 1954 | printk(KERN_INFO |
1727 | "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", | 1955 | "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", |
1728 | pci_name(pdev), start, end); | 1956 | pci_name(pdev), start, end); |
1729 | /* page table init */ | 1957 | |
1730 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); | 1958 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); |
1731 | if (!domain) | 1959 | if (!domain) |
1732 | return -ENOMEM; | 1960 | return -ENOMEM; |
1733 | 1961 | ||
1734 | /* The address might not be aligned */ | 1962 | ret = iommu_domain_identity_map(domain, start, end); |
1735 | base = start & PAGE_MASK; | 1963 | if (ret) |
1736 | size = end - base; | ||
1737 | size = PAGE_ALIGN(size); | ||
1738 | if (!reserve_iova(&domain->iovad, IOVA_PFN(base), | ||
1739 | IOVA_PFN(base + size) - 1)) { | ||
1740 | printk(KERN_ERR "IOMMU: reserve iova failed\n"); | ||
1741 | ret = -ENOMEM; | ||
1742 | goto error; | 1964 | goto error; |
1743 | } | ||
1744 | 1965 | ||
1745 | pr_debug("Mapping reserved region %lx@%llx for %s\n", | 1966 | /* context entry init */ |
1746 | size, base, pci_name(pdev)); | 1967 | ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); |
1747 | /* | ||
1748 | * RMRR range might have overlap with physical memory range, | ||
1749 | * clear it first | ||
1750 | */ | ||
1751 | dma_pte_clear_range(domain, base, base + size); | ||
1752 | |||
1753 | ret = domain_page_mapping(domain, base, base, size, | ||
1754 | DMA_PTE_READ|DMA_PTE_WRITE); | ||
1755 | if (ret) | 1968 | if (ret) |
1756 | goto error; | 1969 | goto error; |
1757 | 1970 | ||
1758 | /* context entry init */ | 1971 | return 0; |
1759 | ret = domain_context_mapping(domain, pdev); | 1972 | |
1760 | if (!ret) | 1973 | error: |
1761 | return 0; | ||
1762 | error: | ||
1763 | domain_exit(domain); | 1974 | domain_exit(domain); |
1764 | return ret; | 1975 | return ret; |
1765 | |||
1766 | } | 1976 | } |
1767 | 1977 | ||
1768 | static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, | 1978 | static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, |
@@ -1774,96 +1984,179 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, | |||
1774 | rmrr->end_address + 1); | 1984 | rmrr->end_address + 1); |
1775 | } | 1985 | } |
1776 | 1986 | ||
1777 | #ifdef CONFIG_DMAR_GFX_WA | 1987 | #ifdef CONFIG_DMAR_FLOPPY_WA |
1778 | struct iommu_prepare_data { | 1988 | static inline void iommu_prepare_isa(void) |
1989 | { | ||
1779 | struct pci_dev *pdev; | 1990 | struct pci_dev *pdev; |
1780 | int ret; | 1991 | int ret; |
1781 | }; | ||
1782 | 1992 | ||
1783 | static int __init iommu_prepare_work_fn(unsigned long start_pfn, | 1993 | pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); |
1784 | unsigned long end_pfn, void *datax) | 1994 | if (!pdev) |
1785 | { | 1995 | return; |
1786 | struct iommu_prepare_data *data; | ||
1787 | 1996 | ||
1788 | data = (struct iommu_prepare_data *)datax; | 1997 | printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); |
1998 | ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); | ||
1789 | 1999 | ||
1790 | data->ret = iommu_prepare_identity_map(data->pdev, | 2000 | if (ret) |
1791 | start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | 2001 | printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " |
1792 | return data->ret; | 2002 | "floppy might not work\n"); |
1793 | 2003 | ||
1794 | } | 2004 | } |
1795 | 2005 | #else | |
1796 | static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) | 2006 | static inline void iommu_prepare_isa(void) |
1797 | { | 2007 | { |
1798 | int nid; | 2008 | return; |
1799 | struct iommu_prepare_data data; | ||
1800 | |||
1801 | data.pdev = pdev; | ||
1802 | data.ret = 0; | ||
1803 | |||
1804 | for_each_online_node(nid) { | ||
1805 | work_with_active_regions(nid, iommu_prepare_work_fn, &data); | ||
1806 | if (data.ret) | ||
1807 | return data.ret; | ||
1808 | } | ||
1809 | return data.ret; | ||
1810 | } | 2009 | } |
2010 | #endif /* !CONFIG_DMAR_FLPY_WA */ | ||
1811 | 2011 | ||
1812 | static void __init iommu_prepare_gfx_mapping(void) | 2012 | /* Initialize each context entry as pass through.*/ |
2013 | static int __init init_context_pass_through(void) | ||
1813 | { | 2014 | { |
1814 | struct pci_dev *pdev = NULL; | 2015 | struct pci_dev *pdev = NULL; |
2016 | struct dmar_domain *domain; | ||
1815 | int ret; | 2017 | int ret; |
1816 | 2018 | ||
1817 | for_each_pci_dev(pdev) { | 2019 | for_each_pci_dev(pdev) { |
1818 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO || | 2020 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); |
1819 | !IS_GFX_DEVICE(pdev)) | 2021 | ret = domain_context_mapping(domain, pdev, |
1820 | continue; | 2022 | CONTEXT_TT_PASS_THROUGH); |
1821 | printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", | ||
1822 | pci_name(pdev)); | ||
1823 | ret = iommu_prepare_with_active_regions(pdev); | ||
1824 | if (ret) | 2023 | if (ret) |
1825 | printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); | 2024 | return ret; |
1826 | } | 2025 | } |
2026 | return 0; | ||
1827 | } | 2027 | } |
1828 | #else /* !CONFIG_DMAR_GFX_WA */ | 2028 | |
1829 | static inline void iommu_prepare_gfx_mapping(void) | 2029 | static int md_domain_init(struct dmar_domain *domain, int guest_width); |
2030 | |||
2031 | static int __init si_domain_work_fn(unsigned long start_pfn, | ||
2032 | unsigned long end_pfn, void *datax) | ||
1830 | { | 2033 | { |
1831 | return; | 2034 | int *ret = datax; |
2035 | |||
2036 | *ret = iommu_domain_identity_map(si_domain, | ||
2037 | (uint64_t)start_pfn << PAGE_SHIFT, | ||
2038 | (uint64_t)end_pfn << PAGE_SHIFT); | ||
2039 | return *ret; | ||
2040 | |||
1832 | } | 2041 | } |
1833 | #endif | ||
1834 | 2042 | ||
1835 | #ifdef CONFIG_DMAR_FLOPPY_WA | 2043 | static int si_domain_init(void) |
1836 | static inline void iommu_prepare_isa(void) | ||
1837 | { | 2044 | { |
1838 | struct pci_dev *pdev; | 2045 | struct dmar_drhd_unit *drhd; |
1839 | int ret; | 2046 | struct intel_iommu *iommu; |
2047 | int nid, ret = 0; | ||
1840 | 2048 | ||
1841 | pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); | 2049 | si_domain = alloc_domain(); |
1842 | if (!pdev) | 2050 | if (!si_domain) |
1843 | return; | 2051 | return -EFAULT; |
1844 | 2052 | ||
1845 | printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); | 2053 | pr_debug("Identity mapping domain is domain %d\n", si_domain->id); |
1846 | ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); | ||
1847 | 2054 | ||
1848 | if (ret) | 2055 | for_each_active_iommu(iommu, drhd) { |
1849 | printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " | 2056 | ret = iommu_attach_domain(si_domain, iommu); |
1850 | "floppy might not work\n"); | 2057 | if (ret) { |
2058 | domain_exit(si_domain); | ||
2059 | return -EFAULT; | ||
2060 | } | ||
2061 | } | ||
2062 | |||
2063 | if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { | ||
2064 | domain_exit(si_domain); | ||
2065 | return -EFAULT; | ||
2066 | } | ||
1851 | 2067 | ||
2068 | si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; | ||
2069 | |||
2070 | for_each_online_node(nid) { | ||
2071 | work_with_active_regions(nid, si_domain_work_fn, &ret); | ||
2072 | if (ret) | ||
2073 | return ret; | ||
2074 | } | ||
2075 | |||
2076 | return 0; | ||
1852 | } | 2077 | } |
1853 | #else | 2078 | |
1854 | static inline void iommu_prepare_isa(void) | 2079 | static void domain_remove_one_dev_info(struct dmar_domain *domain, |
2080 | struct pci_dev *pdev); | ||
2081 | static int identity_mapping(struct pci_dev *pdev) | ||
1855 | { | 2082 | { |
1856 | return; | 2083 | struct device_domain_info *info; |
2084 | |||
2085 | if (likely(!iommu_identity_mapping)) | ||
2086 | return 0; | ||
2087 | |||
2088 | |||
2089 | list_for_each_entry(info, &si_domain->devices, link) | ||
2090 | if (info->dev == pdev) | ||
2091 | return 1; | ||
2092 | return 0; | ||
2093 | } | ||
2094 | |||
2095 | static int domain_add_dev_info(struct dmar_domain *domain, | ||
2096 | struct pci_dev *pdev) | ||
2097 | { | ||
2098 | struct device_domain_info *info; | ||
2099 | unsigned long flags; | ||
2100 | |||
2101 | info = alloc_devinfo_mem(); | ||
2102 | if (!info) | ||
2103 | return -ENOMEM; | ||
2104 | |||
2105 | info->segment = pci_domain_nr(pdev->bus); | ||
2106 | info->bus = pdev->bus->number; | ||
2107 | info->devfn = pdev->devfn; | ||
2108 | info->dev = pdev; | ||
2109 | info->domain = domain; | ||
2110 | |||
2111 | spin_lock_irqsave(&device_domain_lock, flags); | ||
2112 | list_add(&info->link, &domain->devices); | ||
2113 | list_add(&info->global, &device_domain_list); | ||
2114 | pdev->dev.archdata.iommu = info; | ||
2115 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
2116 | |||
2117 | return 0; | ||
2118 | } | ||
2119 | |||
2120 | static int iommu_prepare_static_identity_mapping(void) | ||
2121 | { | ||
2122 | struct pci_dev *pdev = NULL; | ||
2123 | int ret; | ||
2124 | |||
2125 | ret = si_domain_init(); | ||
2126 | if (ret) | ||
2127 | return -EFAULT; | ||
2128 | |||
2129 | for_each_pci_dev(pdev) { | ||
2130 | printk(KERN_INFO "IOMMU: identity mapping for device %s\n", | ||
2131 | pci_name(pdev)); | ||
2132 | |||
2133 | ret = domain_context_mapping(si_domain, pdev, | ||
2134 | CONTEXT_TT_MULTI_LEVEL); | ||
2135 | if (ret) | ||
2136 | return ret; | ||
2137 | ret = domain_add_dev_info(si_domain, pdev); | ||
2138 | if (ret) | ||
2139 | return ret; | ||
2140 | } | ||
2141 | |||
2142 | return 0; | ||
1857 | } | 2143 | } |
1858 | #endif /* !CONFIG_DMAR_FLPY_WA */ | ||
1859 | 2144 | ||
1860 | static int __init init_dmars(void) | 2145 | int __init init_dmars(void) |
1861 | { | 2146 | { |
1862 | struct dmar_drhd_unit *drhd; | 2147 | struct dmar_drhd_unit *drhd; |
1863 | struct dmar_rmrr_unit *rmrr; | 2148 | struct dmar_rmrr_unit *rmrr; |
1864 | struct pci_dev *pdev; | 2149 | struct pci_dev *pdev; |
1865 | struct intel_iommu *iommu; | 2150 | struct intel_iommu *iommu; |
1866 | int i, ret; | 2151 | int i, ret; |
2152 | int pass_through = 1; | ||
2153 | |||
2154 | /* | ||
2155 | * In case pass through can not be enabled, iommu tries to use identity | ||
2156 | * mapping. | ||
2157 | */ | ||
2158 | if (iommu_pass_through) | ||
2159 | iommu_identity_mapping = 1; | ||
1867 | 2160 | ||
1868 | /* | 2161 | /* |
1869 | * for each drhd | 2162 | * for each drhd |
@@ -1917,7 +2210,15 @@ static int __init init_dmars(void) | |||
1917 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); | 2210 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); |
1918 | goto error; | 2211 | goto error; |
1919 | } | 2212 | } |
2213 | if (!ecap_pass_through(iommu->ecap)) | ||
2214 | pass_through = 0; | ||
1920 | } | 2215 | } |
2216 | if (iommu_pass_through) | ||
2217 | if (!pass_through) { | ||
2218 | printk(KERN_INFO | ||
2219 | "Pass Through is not supported by hardware.\n"); | ||
2220 | iommu_pass_through = 0; | ||
2221 | } | ||
1921 | 2222 | ||
1922 | /* | 2223 | /* |
1923 | * Start from the sane iommu hardware state. | 2224 | * Start from the sane iommu hardware state. |
@@ -1973,35 +2274,58 @@ static int __init init_dmars(void) | |||
1973 | } | 2274 | } |
1974 | 2275 | ||
1975 | /* | 2276 | /* |
1976 | * For each rmrr | 2277 | * If pass through is set and enabled, context entries of all pci |
1977 | * for each dev attached to rmrr | 2278 | * devices are intialized by pass through translation type. |
1978 | * do | ||
1979 | * locate drhd for dev, alloc domain for dev | ||
1980 | * allocate free domain | ||
1981 | * allocate page table entries for rmrr | ||
1982 | * if context not allocated for bus | ||
1983 | * allocate and init context | ||
1984 | * set present in root table for this bus | ||
1985 | * init context with domain, translation etc | ||
1986 | * endfor | ||
1987 | * endfor | ||
1988 | */ | 2279 | */ |
1989 | for_each_rmrr_units(rmrr) { | 2280 | if (iommu_pass_through) { |
1990 | for (i = 0; i < rmrr->devices_cnt; i++) { | 2281 | ret = init_context_pass_through(); |
1991 | pdev = rmrr->devices[i]; | 2282 | if (ret) { |
1992 | /* some BIOS lists non-exist devices in DMAR table */ | 2283 | printk(KERN_ERR "IOMMU: Pass through init failed.\n"); |
1993 | if (!pdev) | 2284 | iommu_pass_through = 0; |
1994 | continue; | ||
1995 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | ||
1996 | if (ret) | ||
1997 | printk(KERN_ERR | ||
1998 | "IOMMU: mapping reserved region failed\n"); | ||
1999 | } | 2285 | } |
2000 | } | 2286 | } |
2001 | 2287 | ||
2002 | iommu_prepare_gfx_mapping(); | 2288 | /* |
2289 | * If pass through is not set or not enabled, setup context entries for | ||
2290 | * identity mappings for rmrr, gfx, and isa and may fall back to static | ||
2291 | * identity mapping if iommu_identity_mapping is set. | ||
2292 | */ | ||
2293 | if (!iommu_pass_through) { | ||
2294 | if (iommu_identity_mapping) | ||
2295 | iommu_prepare_static_identity_mapping(); | ||
2296 | /* | ||
2297 | * For each rmrr | ||
2298 | * for each dev attached to rmrr | ||
2299 | * do | ||
2300 | * locate drhd for dev, alloc domain for dev | ||
2301 | * allocate free domain | ||
2302 | * allocate page table entries for rmrr | ||
2303 | * if context not allocated for bus | ||
2304 | * allocate and init context | ||
2305 | * set present in root table for this bus | ||
2306 | * init context with domain, translation etc | ||
2307 | * endfor | ||
2308 | * endfor | ||
2309 | */ | ||
2310 | printk(KERN_INFO "IOMMU: Setting RMRR:\n"); | ||
2311 | for_each_rmrr_units(rmrr) { | ||
2312 | for (i = 0; i < rmrr->devices_cnt; i++) { | ||
2313 | pdev = rmrr->devices[i]; | ||
2314 | /* | ||
2315 | * some BIOS lists non-exist devices in DMAR | ||
2316 | * table. | ||
2317 | */ | ||
2318 | if (!pdev) | ||
2319 | continue; | ||
2320 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | ||
2321 | if (ret) | ||
2322 | printk(KERN_ERR | ||
2323 | "IOMMU: mapping reserved region failed\n"); | ||
2324 | } | ||
2325 | } | ||
2003 | 2326 | ||
2004 | iommu_prepare_isa(); | 2327 | iommu_prepare_isa(); |
2328 | } | ||
2005 | 2329 | ||
2006 | /* | 2330 | /* |
2007 | * for each drhd | 2331 | * for each drhd |
@@ -2023,10 +2347,8 @@ static int __init init_dmars(void) | |||
2023 | 2347 | ||
2024 | iommu_set_root_entry(iommu); | 2348 | iommu_set_root_entry(iommu); |
2025 | 2349 | ||
2026 | iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, | 2350 | iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); |
2027 | 0); | 2351 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); |
2028 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, | ||
2029 | 0); | ||
2030 | iommu_disable_protect_mem_regions(iommu); | 2352 | iommu_disable_protect_mem_regions(iommu); |
2031 | 2353 | ||
2032 | ret = iommu_enable_translation(iommu); | 2354 | ret = iommu_enable_translation(iommu); |
@@ -2046,50 +2368,40 @@ error: | |||
2046 | return ret; | 2368 | return ret; |
2047 | } | 2369 | } |
2048 | 2370 | ||
2049 | static inline u64 aligned_size(u64 host_addr, size_t size) | 2371 | static inline unsigned long aligned_nrpages(unsigned long host_addr, |
2050 | { | 2372 | size_t size) |
2051 | u64 addr; | ||
2052 | addr = (host_addr & (~PAGE_MASK)) + size; | ||
2053 | return PAGE_ALIGN(addr); | ||
2054 | } | ||
2055 | |||
2056 | struct iova * | ||
2057 | iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) | ||
2058 | { | 2373 | { |
2059 | struct iova *piova; | 2374 | host_addr &= ~PAGE_MASK; |
2060 | 2375 | host_addr += size + PAGE_SIZE - 1; | |
2061 | /* Make sure it's in range */ | ||
2062 | end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); | ||
2063 | if (!size || (IOVA_START_ADDR + size > end)) | ||
2064 | return NULL; | ||
2065 | 2376 | ||
2066 | piova = alloc_iova(&domain->iovad, | 2377 | return host_addr >> VTD_PAGE_SHIFT; |
2067 | size >> PAGE_SHIFT, IOVA_PFN(end), 1); | ||
2068 | return piova; | ||
2069 | } | 2378 | } |
2070 | 2379 | ||
2071 | static struct iova * | 2380 | static struct iova *intel_alloc_iova(struct device *dev, |
2072 | __intel_alloc_iova(struct device *dev, struct dmar_domain *domain, | 2381 | struct dmar_domain *domain, |
2073 | size_t size, u64 dma_mask) | 2382 | unsigned long nrpages, uint64_t dma_mask) |
2074 | { | 2383 | { |
2075 | struct pci_dev *pdev = to_pci_dev(dev); | 2384 | struct pci_dev *pdev = to_pci_dev(dev); |
2076 | struct iova *iova = NULL; | 2385 | struct iova *iova = NULL; |
2077 | 2386 | ||
2078 | if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) | 2387 | /* Restrict dma_mask to the width that the iommu can handle */ |
2079 | iova = iommu_alloc_iova(domain, size, dma_mask); | 2388 | dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); |
2080 | else { | 2389 | |
2390 | if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { | ||
2081 | /* | 2391 | /* |
2082 | * First try to allocate an io virtual address in | 2392 | * First try to allocate an io virtual address in |
2083 | * DMA_BIT_MASK(32) and if that fails then try allocating | 2393 | * DMA_BIT_MASK(32) and if that fails then try allocating |
2084 | * from higher range | 2394 | * from higher range |
2085 | */ | 2395 | */ |
2086 | iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); | 2396 | iova = alloc_iova(&domain->iovad, nrpages, |
2087 | if (!iova) | 2397 | IOVA_PFN(DMA_BIT_MASK(32)), 1); |
2088 | iova = iommu_alloc_iova(domain, size, dma_mask); | 2398 | if (iova) |
2089 | } | 2399 | return iova; |
2090 | 2400 | } | |
2091 | if (!iova) { | 2401 | iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); |
2092 | printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); | 2402 | if (unlikely(!iova)) { |
2403 | printk(KERN_ERR "Allocating %ld-page iova for %s failed", | ||
2404 | nrpages, pci_name(pdev)); | ||
2093 | return NULL; | 2405 | return NULL; |
2094 | } | 2406 | } |
2095 | 2407 | ||
@@ -2112,7 +2424,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev) | |||
2112 | 2424 | ||
2113 | /* make sure context mapping is ok */ | 2425 | /* make sure context mapping is ok */ |
2114 | if (unlikely(!domain_context_mapped(pdev))) { | 2426 | if (unlikely(!domain_context_mapped(pdev))) { |
2115 | ret = domain_context_mapping(domain, pdev); | 2427 | ret = domain_context_mapping(domain, pdev, |
2428 | CONTEXT_TT_MULTI_LEVEL); | ||
2116 | if (ret) { | 2429 | if (ret) { |
2117 | printk(KERN_ERR | 2430 | printk(KERN_ERR |
2118 | "Domain context map for %s failed", | 2431 | "Domain context map for %s failed", |
@@ -2124,6 +2437,52 @@ get_valid_domain_for_dev(struct pci_dev *pdev) | |||
2124 | return domain; | 2437 | return domain; |
2125 | } | 2438 | } |
2126 | 2439 | ||
2440 | static int iommu_dummy(struct pci_dev *pdev) | ||
2441 | { | ||
2442 | return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; | ||
2443 | } | ||
2444 | |||
2445 | /* Check if the pdev needs to go through non-identity map and unmap process.*/ | ||
2446 | static int iommu_no_mapping(struct pci_dev *pdev) | ||
2447 | { | ||
2448 | int found; | ||
2449 | |||
2450 | if (!iommu_identity_mapping) | ||
2451 | return iommu_dummy(pdev); | ||
2452 | |||
2453 | found = identity_mapping(pdev); | ||
2454 | if (found) { | ||
2455 | if (pdev->dma_mask > DMA_BIT_MASK(32)) | ||
2456 | return 1; | ||
2457 | else { | ||
2458 | /* | ||
2459 | * 32 bit DMA is removed from si_domain and fall back | ||
2460 | * to non-identity mapping. | ||
2461 | */ | ||
2462 | domain_remove_one_dev_info(si_domain, pdev); | ||
2463 | printk(KERN_INFO "32bit %s uses non-identity mapping\n", | ||
2464 | pci_name(pdev)); | ||
2465 | return 0; | ||
2466 | } | ||
2467 | } else { | ||
2468 | /* | ||
2469 | * In case of a detached 64 bit DMA device from vm, the device | ||
2470 | * is put into si_domain for identity mapping. | ||
2471 | */ | ||
2472 | if (pdev->dma_mask > DMA_BIT_MASK(32)) { | ||
2473 | int ret; | ||
2474 | ret = domain_add_dev_info(si_domain, pdev); | ||
2475 | if (!ret) { | ||
2476 | printk(KERN_INFO "64bit %s uses identity mapping\n", | ||
2477 | pci_name(pdev)); | ||
2478 | return 1; | ||
2479 | } | ||
2480 | } | ||
2481 | } | ||
2482 | |||
2483 | return iommu_dummy(pdev); | ||
2484 | } | ||
2485 | |||
2127 | static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, | 2486 | static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, |
2128 | size_t size, int dir, u64 dma_mask) | 2487 | size_t size, int dir, u64 dma_mask) |
2129 | { | 2488 | { |
@@ -2136,7 +2495,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, | |||
2136 | struct intel_iommu *iommu; | 2495 | struct intel_iommu *iommu; |
2137 | 2496 | ||
2138 | BUG_ON(dir == DMA_NONE); | 2497 | BUG_ON(dir == DMA_NONE); |
2139 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | 2498 | |
2499 | if (iommu_no_mapping(pdev)) | ||
2140 | return paddr; | 2500 | return paddr; |
2141 | 2501 | ||
2142 | domain = get_valid_domain_for_dev(pdev); | 2502 | domain = get_valid_domain_for_dev(pdev); |
@@ -2144,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, | |||
2144 | return 0; | 2504 | return 0; |
2145 | 2505 | ||
2146 | iommu = domain_get_iommu(domain); | 2506 | iommu = domain_get_iommu(domain); |
2147 | size = aligned_size((u64)paddr, size); | 2507 | size = aligned_nrpages(paddr, size); |
2148 | 2508 | ||
2149 | iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); | 2509 | iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); |
2150 | if (!iova) | 2510 | if (!iova) |
2151 | goto error; | 2511 | goto error; |
2152 | 2512 | ||
2153 | start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; | ||
2154 | |||
2155 | /* | 2513 | /* |
2156 | * Check if DMAR supports zero-length reads on write only | 2514 | * Check if DMAR supports zero-length reads on write only |
2157 | * mappings.. | 2515 | * mappings.. |
@@ -2167,19 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, | |||
2167 | * might have two guest_addr mapping to the same host paddr, but this | 2525 | * might have two guest_addr mapping to the same host paddr, but this |
2168 | * is not a big problem | 2526 | * is not a big problem |
2169 | */ | 2527 | */ |
2170 | ret = domain_page_mapping(domain, start_paddr, | 2528 | ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), |
2171 | ((u64)paddr) & PHYSICAL_PAGE_MASK, | 2529 | paddr >> VTD_PAGE_SHIFT, size, prot); |
2172 | size, prot); | ||
2173 | if (ret) | 2530 | if (ret) |
2174 | goto error; | 2531 | goto error; |
2175 | 2532 | ||
2176 | /* it's a non-present to present mapping */ | 2533 | /* it's a non-present to present mapping. Only flush if caching mode */ |
2177 | ret = iommu_flush_iotlb_psi(iommu, domain->id, | 2534 | if (cap_caching_mode(iommu->cap)) |
2178 | start_paddr, size >> VTD_PAGE_SHIFT, 1); | 2535 | iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size); |
2179 | if (ret) | 2536 | else |
2180 | iommu_flush_write_buffer(iommu); | 2537 | iommu_flush_write_buffer(iommu); |
2181 | 2538 | ||
2182 | return start_paddr + ((u64)paddr & (~PAGE_MASK)); | 2539 | start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; |
2540 | start_paddr += paddr & ~PAGE_MASK; | ||
2541 | return start_paddr; | ||
2183 | 2542 | ||
2184 | error: | 2543 | error: |
2185 | if (iova) | 2544 | if (iova) |
@@ -2210,15 +2569,22 @@ static void flush_unmaps(void) | |||
2210 | if (!iommu) | 2569 | if (!iommu) |
2211 | continue; | 2570 | continue; |
2212 | 2571 | ||
2213 | if (deferred_flush[i].next) { | 2572 | if (!deferred_flush[i].next) |
2214 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, | 2573 | continue; |
2215 | DMA_TLB_GLOBAL_FLUSH, 0); | 2574 | |
2216 | for (j = 0; j < deferred_flush[i].next; j++) { | 2575 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, |
2217 | __free_iova(&deferred_flush[i].domain[j]->iovad, | 2576 | DMA_TLB_GLOBAL_FLUSH); |
2218 | deferred_flush[i].iova[j]); | 2577 | for (j = 0; j < deferred_flush[i].next; j++) { |
2219 | } | 2578 | unsigned long mask; |
2220 | deferred_flush[i].next = 0; | 2579 | struct iova *iova = deferred_flush[i].iova[j]; |
2580 | |||
2581 | mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT; | ||
2582 | mask = ilog2(mask >> VTD_PAGE_SHIFT); | ||
2583 | iommu_flush_dev_iotlb(deferred_flush[i].domain[j], | ||
2584 | iova->pfn_lo << PAGE_SHIFT, mask); | ||
2585 | __free_iova(&deferred_flush[i].domain[j]->iovad, iova); | ||
2221 | } | 2586 | } |
2587 | deferred_flush[i].next = 0; | ||
2222 | } | 2588 | } |
2223 | 2589 | ||
2224 | list_size = 0; | 2590 | list_size = 0; |
@@ -2265,35 +2631,38 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, | |||
2265 | { | 2631 | { |
2266 | struct pci_dev *pdev = to_pci_dev(dev); | 2632 | struct pci_dev *pdev = to_pci_dev(dev); |
2267 | struct dmar_domain *domain; | 2633 | struct dmar_domain *domain; |
2268 | unsigned long start_addr; | 2634 | unsigned long start_pfn, last_pfn; |
2269 | struct iova *iova; | 2635 | struct iova *iova; |
2270 | struct intel_iommu *iommu; | 2636 | struct intel_iommu *iommu; |
2271 | 2637 | ||
2272 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | 2638 | if (iommu_no_mapping(pdev)) |
2273 | return; | 2639 | return; |
2640 | |||
2274 | domain = find_domain(pdev); | 2641 | domain = find_domain(pdev); |
2275 | BUG_ON(!domain); | 2642 | BUG_ON(!domain); |
2276 | 2643 | ||
2277 | iommu = domain_get_iommu(domain); | 2644 | iommu = domain_get_iommu(domain); |
2278 | 2645 | ||
2279 | iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); | 2646 | iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); |
2280 | if (!iova) | 2647 | if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", |
2648 | (unsigned long long)dev_addr)) | ||
2281 | return; | 2649 | return; |
2282 | 2650 | ||
2283 | start_addr = iova->pfn_lo << PAGE_SHIFT; | 2651 | start_pfn = mm_to_dma_pfn(iova->pfn_lo); |
2284 | size = aligned_size((u64)dev_addr, size); | 2652 | last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; |
2285 | 2653 | ||
2286 | pr_debug("Device %s unmapping: %zx@%llx\n", | 2654 | pr_debug("Device %s unmapping: pfn %lx-%lx\n", |
2287 | pci_name(pdev), size, (unsigned long long)start_addr); | 2655 | pci_name(pdev), start_pfn, last_pfn); |
2288 | 2656 | ||
2289 | /* clear the whole page */ | 2657 | /* clear the whole page */ |
2290 | dma_pte_clear_range(domain, start_addr, start_addr + size); | 2658 | dma_pte_clear_range(domain, start_pfn, last_pfn); |
2659 | |||
2291 | /* free page tables */ | 2660 | /* free page tables */ |
2292 | dma_pte_free_pagetable(domain, start_addr, start_addr + size); | 2661 | dma_pte_free_pagetable(domain, start_pfn, last_pfn); |
2662 | |||
2293 | if (intel_iommu_strict) { | 2663 | if (intel_iommu_strict) { |
2294 | if (iommu_flush_iotlb_psi(iommu, | 2664 | iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, |
2295 | domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) | 2665 | last_pfn - start_pfn + 1); |
2296 | iommu_flush_write_buffer(iommu); | ||
2297 | /* free iova */ | 2666 | /* free iova */ |
2298 | __free_iova(&domain->iovad, iova); | 2667 | __free_iova(&domain->iovad, iova); |
2299 | } else { | 2668 | } else { |
@@ -2351,17 +2720,13 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, | |||
2351 | int nelems, enum dma_data_direction dir, | 2720 | int nelems, enum dma_data_direction dir, |
2352 | struct dma_attrs *attrs) | 2721 | struct dma_attrs *attrs) |
2353 | { | 2722 | { |
2354 | int i; | ||
2355 | struct pci_dev *pdev = to_pci_dev(hwdev); | 2723 | struct pci_dev *pdev = to_pci_dev(hwdev); |
2356 | struct dmar_domain *domain; | 2724 | struct dmar_domain *domain; |
2357 | unsigned long start_addr; | 2725 | unsigned long start_pfn, last_pfn; |
2358 | struct iova *iova; | 2726 | struct iova *iova; |
2359 | size_t size = 0; | ||
2360 | phys_addr_t addr; | ||
2361 | struct scatterlist *sg; | ||
2362 | struct intel_iommu *iommu; | 2727 | struct intel_iommu *iommu; |
2363 | 2728 | ||
2364 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | 2729 | if (iommu_no_mapping(pdev)) |
2365 | return; | 2730 | return; |
2366 | 2731 | ||
2367 | domain = find_domain(pdev); | 2732 | domain = find_domain(pdev); |
@@ -2370,23 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, | |||
2370 | iommu = domain_get_iommu(domain); | 2735 | iommu = domain_get_iommu(domain); |
2371 | 2736 | ||
2372 | iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); | 2737 | iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); |
2373 | if (!iova) | 2738 | if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n", |
2739 | (unsigned long long)sglist[0].dma_address)) | ||
2374 | return; | 2740 | return; |
2375 | for_each_sg(sglist, sg, nelems, i) { | ||
2376 | addr = page_to_phys(sg_page(sg)) + sg->offset; | ||
2377 | size += aligned_size((u64)addr, sg->length); | ||
2378 | } | ||
2379 | 2741 | ||
2380 | start_addr = iova->pfn_lo << PAGE_SHIFT; | 2742 | start_pfn = mm_to_dma_pfn(iova->pfn_lo); |
2743 | last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; | ||
2381 | 2744 | ||
2382 | /* clear the whole page */ | 2745 | /* clear the whole page */ |
2383 | dma_pte_clear_range(domain, start_addr, start_addr + size); | 2746 | dma_pte_clear_range(domain, start_pfn, last_pfn); |
2747 | |||
2384 | /* free page tables */ | 2748 | /* free page tables */ |
2385 | dma_pte_free_pagetable(domain, start_addr, start_addr + size); | 2749 | dma_pte_free_pagetable(domain, start_pfn, last_pfn); |
2386 | 2750 | ||
2387 | if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, | 2751 | iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, |
2388 | size >> VTD_PAGE_SHIFT, 0)) | 2752 | (last_pfn - start_pfn + 1)); |
2389 | iommu_flush_write_buffer(iommu); | ||
2390 | 2753 | ||
2391 | /* free iova */ | 2754 | /* free iova */ |
2392 | __free_iova(&domain->iovad, iova); | 2755 | __free_iova(&domain->iovad, iova); |
@@ -2409,21 +2772,20 @@ static int intel_nontranslate_map_sg(struct device *hddev, | |||
2409 | static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, | 2772 | static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, |
2410 | enum dma_data_direction dir, struct dma_attrs *attrs) | 2773 | enum dma_data_direction dir, struct dma_attrs *attrs) |
2411 | { | 2774 | { |
2412 | phys_addr_t addr; | ||
2413 | int i; | 2775 | int i; |
2414 | struct pci_dev *pdev = to_pci_dev(hwdev); | 2776 | struct pci_dev *pdev = to_pci_dev(hwdev); |
2415 | struct dmar_domain *domain; | 2777 | struct dmar_domain *domain; |
2416 | size_t size = 0; | 2778 | size_t size = 0; |
2417 | int prot = 0; | 2779 | int prot = 0; |
2418 | size_t offset = 0; | 2780 | size_t offset_pfn = 0; |
2419 | struct iova *iova = NULL; | 2781 | struct iova *iova = NULL; |
2420 | int ret; | 2782 | int ret; |
2421 | struct scatterlist *sg; | 2783 | struct scatterlist *sg; |
2422 | unsigned long start_addr; | 2784 | unsigned long start_vpfn; |
2423 | struct intel_iommu *iommu; | 2785 | struct intel_iommu *iommu; |
2424 | 2786 | ||
2425 | BUG_ON(dir == DMA_NONE); | 2787 | BUG_ON(dir == DMA_NONE); |
2426 | if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) | 2788 | if (iommu_no_mapping(pdev)) |
2427 | return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); | 2789 | return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); |
2428 | 2790 | ||
2429 | domain = get_valid_domain_for_dev(pdev); | 2791 | domain = get_valid_domain_for_dev(pdev); |
@@ -2432,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne | |||
2432 | 2794 | ||
2433 | iommu = domain_get_iommu(domain); | 2795 | iommu = domain_get_iommu(domain); |
2434 | 2796 | ||
2435 | for_each_sg(sglist, sg, nelems, i) { | 2797 | for_each_sg(sglist, sg, nelems, i) |
2436 | addr = page_to_phys(sg_page(sg)) + sg->offset; | 2798 | size += aligned_nrpages(sg->offset, sg->length); |
2437 | size += aligned_size((u64)addr, sg->length); | ||
2438 | } | ||
2439 | 2799 | ||
2440 | iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); | 2800 | iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); |
2441 | if (!iova) { | 2801 | if (!iova) { |
2442 | sglist->dma_length = 0; | 2802 | sglist->dma_length = 0; |
2443 | return 0; | 2803 | return 0; |
@@ -2453,35 +2813,27 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne | |||
2453 | if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) | 2813 | if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) |
2454 | prot |= DMA_PTE_WRITE; | 2814 | prot |= DMA_PTE_WRITE; |
2455 | 2815 | ||
2456 | start_addr = iova->pfn_lo << PAGE_SHIFT; | 2816 | start_vpfn = mm_to_dma_pfn(iova->pfn_lo); |
2457 | offset = 0; | 2817 | |
2458 | for_each_sg(sglist, sg, nelems, i) { | 2818 | ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot); |
2459 | addr = page_to_phys(sg_page(sg)) + sg->offset; | 2819 | if (unlikely(ret)) { |
2460 | size = aligned_size((u64)addr, sg->length); | 2820 | /* clear the page */ |
2461 | ret = domain_page_mapping(domain, start_addr + offset, | 2821 | dma_pte_clear_range(domain, start_vpfn, |
2462 | ((u64)addr) & PHYSICAL_PAGE_MASK, | 2822 | start_vpfn + size - 1); |
2463 | size, prot); | 2823 | /* free page tables */ |
2464 | if (ret) { | 2824 | dma_pte_free_pagetable(domain, start_vpfn, |
2465 | /* clear the page */ | 2825 | start_vpfn + size - 1); |
2466 | dma_pte_clear_range(domain, start_addr, | 2826 | /* free iova */ |
2467 | start_addr + offset); | 2827 | __free_iova(&domain->iovad, iova); |
2468 | /* free page tables */ | 2828 | return 0; |
2469 | dma_pte_free_pagetable(domain, start_addr, | ||
2470 | start_addr + offset); | ||
2471 | /* free iova */ | ||
2472 | __free_iova(&domain->iovad, iova); | ||
2473 | return 0; | ||
2474 | } | ||
2475 | sg->dma_address = start_addr + offset + | ||
2476 | ((u64)addr & (~PAGE_MASK)); | ||
2477 | sg->dma_length = sg->length; | ||
2478 | offset += size; | ||
2479 | } | 2829 | } |
2480 | 2830 | ||
2481 | /* it's a non-present to present mapping */ | 2831 | /* it's a non-present to present mapping. Only flush if caching mode */ |
2482 | if (iommu_flush_iotlb_psi(iommu, domain->id, | 2832 | if (cap_caching_mode(iommu->cap)) |
2483 | start_addr, offset >> VTD_PAGE_SHIFT, 1)) | 2833 | iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn); |
2834 | else | ||
2484 | iommu_flush_write_buffer(iommu); | 2835 | iommu_flush_write_buffer(iommu); |
2836 | |||
2485 | return nelems; | 2837 | return nelems; |
2486 | } | 2838 | } |
2487 | 2839 | ||
@@ -2640,9 +2992,9 @@ static int init_iommu_hw(void) | |||
2640 | iommu_set_root_entry(iommu); | 2992 | iommu_set_root_entry(iommu); |
2641 | 2993 | ||
2642 | iommu->flush.flush_context(iommu, 0, 0, 0, | 2994 | iommu->flush.flush_context(iommu, 0, 0, 0, |
2643 | DMA_CCMD_GLOBAL_INVL, 0); | 2995 | DMA_CCMD_GLOBAL_INVL); |
2644 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, | 2996 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, |
2645 | DMA_TLB_GLOBAL_FLUSH, 0); | 2997 | DMA_TLB_GLOBAL_FLUSH); |
2646 | iommu_disable_protect_mem_regions(iommu); | 2998 | iommu_disable_protect_mem_regions(iommu); |
2647 | iommu_enable_translation(iommu); | 2999 | iommu_enable_translation(iommu); |
2648 | } | 3000 | } |
@@ -2657,9 +3009,9 @@ static void iommu_flush_all(void) | |||
2657 | 3009 | ||
2658 | for_each_active_iommu(iommu, drhd) { | 3010 | for_each_active_iommu(iommu, drhd) { |
2659 | iommu->flush.flush_context(iommu, 0, 0, 0, | 3011 | iommu->flush.flush_context(iommu, 0, 0, 0, |
2660 | DMA_CCMD_GLOBAL_INVL, 0); | 3012 | DMA_CCMD_GLOBAL_INVL); |
2661 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, | 3013 | iommu->flush.flush_iotlb(iommu, 0, 0, 0, |
2662 | DMA_TLB_GLOBAL_FLUSH, 0); | 3014 | DMA_TLB_GLOBAL_FLUSH); |
2663 | } | 3015 | } |
2664 | } | 3016 | } |
2665 | 3017 | ||
@@ -2782,7 +3134,7 @@ int __init intel_iommu_init(void) | |||
2782 | * Check the need for DMA-remapping initialization now. | 3134 | * Check the need for DMA-remapping initialization now. |
2783 | * Above initialization will also be used by Interrupt-remapping. | 3135 | * Above initialization will also be used by Interrupt-remapping. |
2784 | */ | 3136 | */ |
2785 | if (no_iommu || swiotlb || dmar_disabled) | 3137 | if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) |
2786 | return -ENODEV; | 3138 | return -ENODEV; |
2787 | 3139 | ||
2788 | iommu_init_mempool(); | 3140 | iommu_init_mempool(); |
@@ -2802,35 +3154,18 @@ int __init intel_iommu_init(void) | |||
2802 | 3154 | ||
2803 | init_timer(&unmap_timer); | 3155 | init_timer(&unmap_timer); |
2804 | force_iommu = 1; | 3156 | force_iommu = 1; |
2805 | dma_ops = &intel_dma_ops; | ||
2806 | init_iommu_sysfs(); | ||
2807 | |||
2808 | register_iommu(&intel_iommu_ops); | ||
2809 | 3157 | ||
2810 | return 0; | 3158 | if (!iommu_pass_through) { |
2811 | } | 3159 | printk(KERN_INFO |
3160 | "Multi-level page-table translation for DMAR.\n"); | ||
3161 | dma_ops = &intel_dma_ops; | ||
3162 | } else | ||
3163 | printk(KERN_INFO | ||
3164 | "DMAR: Pass through translation for DMAR.\n"); | ||
2812 | 3165 | ||
2813 | static int vm_domain_add_dev_info(struct dmar_domain *domain, | 3166 | init_iommu_sysfs(); |
2814 | struct pci_dev *pdev) | ||
2815 | { | ||
2816 | struct device_domain_info *info; | ||
2817 | unsigned long flags; | ||
2818 | |||
2819 | info = alloc_devinfo_mem(); | ||
2820 | if (!info) | ||
2821 | return -ENOMEM; | ||
2822 | |||
2823 | info->segment = pci_domain_nr(pdev->bus); | ||
2824 | info->bus = pdev->bus->number; | ||
2825 | info->devfn = pdev->devfn; | ||
2826 | info->dev = pdev; | ||
2827 | info->domain = domain; | ||
2828 | 3167 | ||
2829 | spin_lock_irqsave(&device_domain_lock, flags); | 3168 | register_iommu(&intel_iommu_ops); |
2830 | list_add(&info->link, &domain->devices); | ||
2831 | list_add(&info->global, &device_domain_list); | ||
2832 | pdev->dev.archdata.iommu = info; | ||
2833 | spin_unlock_irqrestore(&device_domain_lock, flags); | ||
2834 | 3169 | ||
2835 | return 0; | 3170 | return 0; |
2836 | } | 3171 | } |
@@ -2862,7 +3197,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, | |||
2862 | } | 3197 | } |
2863 | } | 3198 | } |
2864 | 3199 | ||
2865 | static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, | 3200 | static void domain_remove_one_dev_info(struct dmar_domain *domain, |
2866 | struct pci_dev *pdev) | 3201 | struct pci_dev *pdev) |
2867 | { | 3202 | { |
2868 | struct device_domain_info *info; | 3203 | struct device_domain_info *info; |
@@ -2888,6 +3223,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, | |||
2888 | info->dev->dev.archdata.iommu = NULL; | 3223 | info->dev->dev.archdata.iommu = NULL; |
2889 | spin_unlock_irqrestore(&device_domain_lock, flags); | 3224 | spin_unlock_irqrestore(&device_domain_lock, flags); |
2890 | 3225 | ||
3226 | iommu_disable_dev_iotlb(info); | ||
2891 | iommu_detach_dev(iommu, info->bus, info->devfn); | 3227 | iommu_detach_dev(iommu, info->bus, info->devfn); |
2892 | iommu_detach_dependent_devices(iommu, pdev); | 3228 | iommu_detach_dependent_devices(iommu, pdev); |
2893 | free_devinfo_mem(info); | 3229 | free_devinfo_mem(info); |
@@ -2938,6 +3274,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) | |||
2938 | 3274 | ||
2939 | spin_unlock_irqrestore(&device_domain_lock, flags1); | 3275 | spin_unlock_irqrestore(&device_domain_lock, flags1); |
2940 | 3276 | ||
3277 | iommu_disable_dev_iotlb(info); | ||
2941 | iommu = device_to_iommu(info->segment, info->bus, info->devfn); | 3278 | iommu = device_to_iommu(info->segment, info->bus, info->devfn); |
2942 | iommu_detach_dev(iommu, info->bus, info->devfn); | 3279 | iommu_detach_dev(iommu, info->bus, info->devfn); |
2943 | iommu_detach_dependent_devices(iommu, info->dev); | 3280 | iommu_detach_dependent_devices(iommu, info->dev); |
@@ -2993,12 +3330,11 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) | |||
2993 | return domain; | 3330 | return domain; |
2994 | } | 3331 | } |
2995 | 3332 | ||
2996 | static int vm_domain_init(struct dmar_domain *domain, int guest_width) | 3333 | static int md_domain_init(struct dmar_domain *domain, int guest_width) |
2997 | { | 3334 | { |
2998 | int adjust_width; | 3335 | int adjust_width; |
2999 | 3336 | ||
3000 | init_iova_domain(&domain->iovad, DMA_32BIT_PFN); | 3337 | init_iova_domain(&domain->iovad, DMA_32BIT_PFN); |
3001 | spin_lock_init(&domain->mapping_lock); | ||
3002 | spin_lock_init(&domain->iommu_lock); | 3338 | spin_lock_init(&domain->iommu_lock); |
3003 | 3339 | ||
3004 | domain_reserve_special_ranges(domain); | 3340 | domain_reserve_special_ranges(domain); |
@@ -3052,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain) | |||
3052 | 3388 | ||
3053 | static void vm_domain_exit(struct dmar_domain *domain) | 3389 | static void vm_domain_exit(struct dmar_domain *domain) |
3054 | { | 3390 | { |
3055 | u64 end; | ||
3056 | |||
3057 | /* Domain 0 is reserved, so dont process it */ | 3391 | /* Domain 0 is reserved, so dont process it */ |
3058 | if (!domain) | 3392 | if (!domain) |
3059 | return; | 3393 | return; |
@@ -3061,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain) | |||
3061 | vm_domain_remove_all_dev_info(domain); | 3395 | vm_domain_remove_all_dev_info(domain); |
3062 | /* destroy iovas */ | 3396 | /* destroy iovas */ |
3063 | put_iova_domain(&domain->iovad); | 3397 | put_iova_domain(&domain->iovad); |
3064 | end = DOMAIN_MAX_ADDR(domain->gaw); | ||
3065 | end = end & (~VTD_PAGE_MASK); | ||
3066 | 3398 | ||
3067 | /* clear ptes */ | 3399 | /* clear ptes */ |
3068 | dma_pte_clear_range(domain, 0, end); | 3400 | dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); |
3069 | 3401 | ||
3070 | /* free page tables */ | 3402 | /* free page tables */ |
3071 | dma_pte_free_pagetable(domain, 0, end); | 3403 | dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); |
3072 | 3404 | ||
3073 | iommu_free_vm_domain(domain); | 3405 | iommu_free_vm_domain(domain); |
3074 | free_domain_mem(domain); | 3406 | free_domain_mem(domain); |
@@ -3084,7 +3416,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain) | |||
3084 | "intel_iommu_domain_init: dmar_domain == NULL\n"); | 3416 | "intel_iommu_domain_init: dmar_domain == NULL\n"); |
3085 | return -ENOMEM; | 3417 | return -ENOMEM; |
3086 | } | 3418 | } |
3087 | if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { | 3419 | if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { |
3088 | printk(KERN_ERR | 3420 | printk(KERN_ERR |
3089 | "intel_iommu_domain_init() failed\n"); | 3421 | "intel_iommu_domain_init() failed\n"); |
3090 | vm_domain_exit(dmar_domain); | 3422 | vm_domain_exit(dmar_domain); |
@@ -3119,8 +3451,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, | |||
3119 | 3451 | ||
3120 | old_domain = find_domain(pdev); | 3452 | old_domain = find_domain(pdev); |
3121 | if (old_domain) { | 3453 | if (old_domain) { |
3122 | if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) | 3454 | if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || |
3123 | vm_domain_remove_one_dev_info(old_domain, pdev); | 3455 | dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) |
3456 | domain_remove_one_dev_info(old_domain, pdev); | ||
3124 | else | 3457 | else |
3125 | domain_remove_dev_info(old_domain); | 3458 | domain_remove_dev_info(old_domain); |
3126 | } | 3459 | } |
@@ -3142,11 +3475,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, | |||
3142 | return -EFAULT; | 3475 | return -EFAULT; |
3143 | } | 3476 | } |
3144 | 3477 | ||
3145 | ret = domain_context_mapping(dmar_domain, pdev); | 3478 | ret = domain_add_dev_info(dmar_domain, pdev); |
3146 | if (ret) | 3479 | if (ret) |
3147 | return ret; | 3480 | return ret; |
3148 | 3481 | ||
3149 | ret = vm_domain_add_dev_info(dmar_domain, pdev); | 3482 | ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); |
3150 | return ret; | 3483 | return ret; |
3151 | } | 3484 | } |
3152 | 3485 | ||
@@ -3156,7 +3489,7 @@ static void intel_iommu_detach_device(struct iommu_domain *domain, | |||
3156 | struct dmar_domain *dmar_domain = domain->priv; | 3489 | struct dmar_domain *dmar_domain = domain->priv; |
3157 | struct pci_dev *pdev = to_pci_dev(dev); | 3490 | struct pci_dev *pdev = to_pci_dev(dev); |
3158 | 3491 | ||
3159 | vm_domain_remove_one_dev_info(dmar_domain, pdev); | 3492 | domain_remove_one_dev_info(dmar_domain, pdev); |
3160 | } | 3493 | } |
3161 | 3494 | ||
3162 | static int intel_iommu_map_range(struct iommu_domain *domain, | 3495 | static int intel_iommu_map_range(struct iommu_domain *domain, |
@@ -3176,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain, | |||
3176 | if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) | 3509 | if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) |
3177 | prot |= DMA_PTE_SNP; | 3510 | prot |= DMA_PTE_SNP; |
3178 | 3511 | ||
3179 | max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); | 3512 | max_addr = iova + size; |
3180 | if (dmar_domain->max_addr < max_addr) { | 3513 | if (dmar_domain->max_addr < max_addr) { |
3181 | int min_agaw; | 3514 | int min_agaw; |
3182 | u64 end; | 3515 | u64 end; |
@@ -3194,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain, | |||
3194 | } | 3527 | } |
3195 | dmar_domain->max_addr = max_addr; | 3528 | dmar_domain->max_addr = max_addr; |
3196 | } | 3529 | } |
3197 | 3530 | /* Round up size to next multiple of PAGE_SIZE, if it and | |
3198 | ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); | 3531 | the low bits of hpa would take us onto the next page */ |
3532 | size = aligned_nrpages(hpa, size); | ||
3533 | ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, | ||
3534 | hpa >> VTD_PAGE_SHIFT, size, prot); | ||
3199 | return ret; | 3535 | return ret; |
3200 | } | 3536 | } |
3201 | 3537 | ||
@@ -3203,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain, | |||
3203 | unsigned long iova, size_t size) | 3539 | unsigned long iova, size_t size) |
3204 | { | 3540 | { |
3205 | struct dmar_domain *dmar_domain = domain->priv; | 3541 | struct dmar_domain *dmar_domain = domain->priv; |
3206 | dma_addr_t base; | ||
3207 | 3542 | ||
3208 | /* The address might not be aligned */ | 3543 | dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, |
3209 | base = iova & VTD_PAGE_MASK; | 3544 | (iova + size - 1) >> VTD_PAGE_SHIFT); |
3210 | size = VTD_PAGE_ALIGN(size); | ||
3211 | dma_pte_clear_range(dmar_domain, base, base + size); | ||
3212 | 3545 | ||
3213 | if (dmar_domain->max_addr == base + size) | 3546 | if (dmar_domain->max_addr == iova + size) |
3214 | dmar_domain->max_addr = base; | 3547 | dmar_domain->max_addr = iova; |
3215 | } | 3548 | } |
3216 | 3549 | ||
3217 | static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, | 3550 | static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, |
@@ -3221,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, | |||
3221 | struct dma_pte *pte; | 3554 | struct dma_pte *pte; |
3222 | u64 phys = 0; | 3555 | u64 phys = 0; |
3223 | 3556 | ||
3224 | pte = addr_to_dma_pte(dmar_domain, iova); | 3557 | pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); |
3225 | if (pte) | 3558 | if (pte) |
3226 | phys = dma_pte_addr(pte); | 3559 | phys = dma_pte_addr(pte); |
3227 | 3560 | ||