aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c243
1 files changed, 203 insertions, 40 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index d552d2c77844..f02c34d26d1b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
39#include <linux/syscore_ops.h> 39#include <linux/syscore_ops.h>
40#include <linux/tboot.h> 40#include <linux/tboot.h>
41#include <linux/dmi.h> 41#include <linux/dmi.h>
42#include <linux/pci-ats.h>
42#include <asm/cacheflush.h> 43#include <asm/cacheflush.h>
43#include <asm/iommu.h> 44#include <asm/iommu.h>
44#include "pci.h" 45#include "pci.h"
@@ -46,6 +47,8 @@
46#define ROOT_SIZE VTD_PAGE_SIZE 47#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE 48#define CONTEXT_SIZE VTD_PAGE_SIZE
48 49
50#define IS_BRIDGE_HOST_DEVICE(pdev) \
51 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -115,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
115 return (pfn + level_size(level) - 1) & level_mask(level); 118 return (pfn + level_size(level) - 1) & level_mask(level);
116} 119}
117 120
121static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
122{
123 return 1 << ((lvl - 1) * LEVEL_STRIDE);
124}
125
118/* VT-d pages must always be _smaller_ than MM pages. Otherwise things 126/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
119 are never going to work. */ 127 are never going to work. */
120static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 128static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -142,6 +150,12 @@ static void __init check_tylersburg_isoch(void);
142static int rwbf_quirk; 150static int rwbf_quirk;
143 151
144/* 152/*
153 * set to 1 to panic kernel if can't successfully enable VT-d
154 * (used when kernel is launched w/ TXT)
155 */
156static int force_on = 0;
157
158/*
145 * 0: Present 159 * 0: Present
146 * 1-11: Reserved 160 * 1-11: Reserved
147 * 12-63: Context Ptr (12 - (haw-1)) 161 * 12-63: Context Ptr (12 - (haw-1))
@@ -337,6 +351,9 @@ struct dmar_domain {
337 int iommu_coherency;/* indicate coherency of iommu access */ 351 int iommu_coherency;/* indicate coherency of iommu access */
338 int iommu_snooping; /* indicate snooping control feature*/ 352 int iommu_snooping; /* indicate snooping control feature*/
339 int iommu_count; /* reference count of iommu */ 353 int iommu_count; /* reference count of iommu */
354 int iommu_superpage;/* Level of superpages supported:
355 0 == 4KiB (no superpages), 1 == 2MiB,
356 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
340 spinlock_t iommu_lock; /* protect iommu set in domain */ 357 spinlock_t iommu_lock; /* protect iommu set in domain */
341 u64 max_addr; /* maximum mapped address */ 358 u64 max_addr; /* maximum mapped address */
342}; 359};
@@ -386,6 +403,7 @@ int dmar_disabled = 1;
386static int dmar_map_gfx = 1; 403static int dmar_map_gfx = 1;
387static int dmar_forcedac; 404static int dmar_forcedac;
388static int intel_iommu_strict; 405static int intel_iommu_strict;
406static int intel_iommu_superpage = 1;
389 407
390#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 408#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
391static DEFINE_SPINLOCK(device_domain_lock); 409static DEFINE_SPINLOCK(device_domain_lock);
@@ -416,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
416 printk(KERN_INFO 434 printk(KERN_INFO
417 "Intel-IOMMU: disable batched IOTLB flush\n"); 435 "Intel-IOMMU: disable batched IOTLB flush\n");
418 intel_iommu_strict = 1; 436 intel_iommu_strict = 1;
437 } else if (!strncmp(str, "sp_off", 6)) {
438 printk(KERN_INFO
439 "Intel-IOMMU: disable supported super page\n");
440 intel_iommu_superpage = 0;
419 } 441 }
420 442
421 str += strcspn(str, ","); 443 str += strcspn(str, ",");
@@ -554,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
554 } 576 }
555} 577}
556 578
579static void domain_update_iommu_superpage(struct dmar_domain *domain)
580{
581 int i, mask = 0xf;
582
583 if (!intel_iommu_superpage) {
584 domain->iommu_superpage = 0;
585 return;
586 }
587
588 domain->iommu_superpage = 4; /* 1TiB */
589
590 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
591 mask |= cap_super_page_val(g_iommus[i]->cap);
592 if (!mask) {
593 break;
594 }
595 }
596 domain->iommu_superpage = fls(mask);
597}
598
557/* Some capabilities may be different across iommus */ 599/* Some capabilities may be different across iommus */
558static void domain_update_iommu_cap(struct dmar_domain *domain) 600static void domain_update_iommu_cap(struct dmar_domain *domain)
559{ 601{
560 domain_update_iommu_coherency(domain); 602 domain_update_iommu_coherency(domain);
561 domain_update_iommu_snooping(domain); 603 domain_update_iommu_snooping(domain);
604 domain_update_iommu_superpage(domain);
562} 605}
563 606
564static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) 607static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -688,23 +731,31 @@ out:
688} 731}
689 732
690static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 733static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
691 unsigned long pfn) 734 unsigned long pfn, int large_level)
692{ 735{
693 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 736 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
694 struct dma_pte *parent, *pte = NULL; 737 struct dma_pte *parent, *pte = NULL;
695 int level = agaw_to_level(domain->agaw); 738 int level = agaw_to_level(domain->agaw);
696 int offset; 739 int offset, target_level;
697 740
698 BUG_ON(!domain->pgd); 741 BUG_ON(!domain->pgd);
699 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); 742 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
700 parent = domain->pgd; 743 parent = domain->pgd;
701 744
745 /* Search pte */
746 if (!large_level)
747 target_level = 1;
748 else
749 target_level = large_level;
750
702 while (level > 0) { 751 while (level > 0) {
703 void *tmp_page; 752 void *tmp_page;
704 753
705 offset = pfn_level_offset(pfn, level); 754 offset = pfn_level_offset(pfn, level);
706 pte = &parent[offset]; 755 pte = &parent[offset];
707 if (level == 1) 756 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
757 break;
758 if (level == target_level)
708 break; 759 break;
709 760
710 if (!dma_pte_present(pte)) { 761 if (!dma_pte_present(pte)) {
@@ -732,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
732 return pte; 783 return pte;
733} 784}
734 785
786
735/* return address's pte at specific level */ 787/* return address's pte at specific level */
736static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 788static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
737 unsigned long pfn, 789 unsigned long pfn,
738 int level) 790 int level, int *large_page)
739{ 791{
740 struct dma_pte *parent, *pte = NULL; 792 struct dma_pte *parent, *pte = NULL;
741 int total = agaw_to_level(domain->agaw); 793 int total = agaw_to_level(domain->agaw);
@@ -748,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
748 if (level == total) 800 if (level == total)
749 return pte; 801 return pte;
750 802
751 if (!dma_pte_present(pte)) 803 if (!dma_pte_present(pte)) {
804 *large_page = total;
752 break; 805 break;
806 }
807
808 if (pte->val & DMA_PTE_LARGE_PAGE) {
809 *large_page = total;
810 return pte;
811 }
812
753 parent = phys_to_virt(dma_pte_addr(pte)); 813 parent = phys_to_virt(dma_pte_addr(pte));
754 total--; 814 total--;
755 } 815 }
@@ -762,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
762 unsigned long last_pfn) 822 unsigned long last_pfn)
763{ 823{
764 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 824 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
825 unsigned int large_page = 1;
765 struct dma_pte *first_pte, *pte; 826 struct dma_pte *first_pte, *pte;
766 827
767 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 828 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -770,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
770 831
771 /* we don't need lock here; nobody else touches the iova range */ 832 /* we don't need lock here; nobody else touches the iova range */
772 do { 833 do {
773 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); 834 large_page = 1;
835 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
774 if (!pte) { 836 if (!pte) {
775 start_pfn = align_to_level(start_pfn + 1, 2); 837 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
776 continue; 838 continue;
777 } 839 }
778 do { 840 do {
779 dma_clear_pte(pte); 841 dma_clear_pte(pte);
780 start_pfn++; 842 start_pfn += lvl_to_nr_pages(large_page);
781 pte++; 843 pte++;
782 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 844 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
783 845
@@ -797,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
797 int total = agaw_to_level(domain->agaw); 859 int total = agaw_to_level(domain->agaw);
798 int level; 860 int level;
799 unsigned long tmp; 861 unsigned long tmp;
862 int large_page = 2;
800 863
801 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 864 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
802 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 865 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -812,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
812 return; 875 return;
813 876
814 do { 877 do {
815 first_pte = pte = dma_pfn_level_pte(domain, tmp, level); 878 large_page = level;
879 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
880 if (large_page > level)
881 level = large_page + 1;
816 if (!pte) { 882 if (!pte) {
817 tmp = align_to_level(tmp + 1, level + 1); 883 tmp = align_to_level(tmp + 1, level + 1);
818 continue; 884 continue;
@@ -1396,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1396 else 1462 else
1397 domain->iommu_snooping = 0; 1463 domain->iommu_snooping = 0;
1398 1464
1465 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1399 domain->iommu_count = 1; 1466 domain->iommu_count = 1;
1400 domain->nid = iommu->node; 1467 domain->nid = iommu->node;
1401 1468
@@ -1416,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
1416 if (!domain) 1483 if (!domain)
1417 return; 1484 return;
1418 1485
1486 /* Flush any lazy unmaps that may reference this domain */
1487 if (!intel_iommu_strict)
1488 flush_unmaps_timeout(0);
1489
1419 domain_remove_dev_info(domain); 1490 domain_remove_dev_info(domain);
1420 /* destroy iovas */ 1491 /* destroy iovas */
1421 put_iova_domain(&domain->iovad); 1492 put_iova_domain(&domain->iovad);
@@ -1647,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
1647 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 1718 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1648} 1719}
1649 1720
1721/* Return largest possible superpage level for a given mapping */
1722static inline int hardware_largepage_caps(struct dmar_domain *domain,
1723 unsigned long iov_pfn,
1724 unsigned long phy_pfn,
1725 unsigned long pages)
1726{
1727 int support, level = 1;
1728 unsigned long pfnmerge;
1729
1730 support = domain->iommu_superpage;
1731
1732 /* To use a large page, the virtual *and* physical addresses
1733 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1734 of them will mean we have to use smaller pages. So just
1735 merge them and check both at once. */
1736 pfnmerge = iov_pfn | phy_pfn;
1737
1738 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1739 pages >>= VTD_STRIDE_SHIFT;
1740 if (!pages)
1741 break;
1742 pfnmerge >>= VTD_STRIDE_SHIFT;
1743 level++;
1744 support--;
1745 }
1746 return level;
1747}
1748
1650static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1749static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1651 struct scatterlist *sg, unsigned long phys_pfn, 1750 struct scatterlist *sg, unsigned long phys_pfn,
1652 unsigned long nr_pages, int prot) 1751 unsigned long nr_pages, int prot)
@@ -1655,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1655 phys_addr_t uninitialized_var(pteval); 1754 phys_addr_t uninitialized_var(pteval);
1656 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 1755 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1657 unsigned long sg_res; 1756 unsigned long sg_res;
1757 unsigned int largepage_lvl = 0;
1758 unsigned long lvl_pages = 0;
1658 1759
1659 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); 1760 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1660 1761
@@ -1670,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1670 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; 1771 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1671 } 1772 }
1672 1773
1673 while (nr_pages--) { 1774 while (nr_pages > 0) {
1674 uint64_t tmp; 1775 uint64_t tmp;
1675 1776
1676 if (!sg_res) { 1777 if (!sg_res) {
@@ -1678,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1678 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; 1779 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1679 sg->dma_length = sg->length; 1780 sg->dma_length = sg->length;
1680 pteval = page_to_phys(sg_page(sg)) | prot; 1781 pteval = page_to_phys(sg_page(sg)) | prot;
1782 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1681 } 1783 }
1784
1682 if (!pte) { 1785 if (!pte) {
1683 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); 1786 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1787
1788 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1684 if (!pte) 1789 if (!pte)
1685 return -ENOMEM; 1790 return -ENOMEM;
1791 /* It is large page*/
1792 if (largepage_lvl > 1)
1793 pteval |= DMA_PTE_LARGE_PAGE;
1794 else
1795 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1796
1686 } 1797 }
1687 /* We don't need lock here, nobody else 1798 /* We don't need lock here, nobody else
1688 * touches the iova range 1799 * touches the iova range
@@ -1698,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1698 } 1809 }
1699 WARN_ON(1); 1810 WARN_ON(1);
1700 } 1811 }
1812
1813 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1814
1815 BUG_ON(nr_pages < lvl_pages);
1816 BUG_ON(sg_res < lvl_pages);
1817
1818 nr_pages -= lvl_pages;
1819 iov_pfn += lvl_pages;
1820 phys_pfn += lvl_pages;
1821 pteval += lvl_pages * VTD_PAGE_SIZE;
1822 sg_res -= lvl_pages;
1823
1824 /* If the next PTE would be the first in a new page, then we
1825 need to flush the cache on the entries we've just written.
1826 And then we'll need to recalculate 'pte', so clear it and
1827 let it get set again in the if (!pte) block above.
1828
1829 If we're done (!nr_pages) we need to flush the cache too.
1830
1831 Also if we've been setting superpages, we may need to
1832 recalculate 'pte' and switch back to smaller pages for the
1833 end of the mapping, if the trailing size is not enough to
1834 use another superpage (i.e. sg_res < lvl_pages). */
1701 pte++; 1835 pte++;
1702 if (!nr_pages || first_pte_in_page(pte)) { 1836 if (!nr_pages || first_pte_in_page(pte) ||
1837 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1703 domain_flush_cache(domain, first_pte, 1838 domain_flush_cache(domain, first_pte,
1704 (void *)pte - (void *)first_pte); 1839 (void *)pte - (void *)first_pte);
1705 pte = NULL; 1840 pte = NULL;
1706 } 1841 }
1707 iov_pfn++; 1842
1708 pteval += VTD_PAGE_SIZE; 1843 if (!sg_res && nr_pages)
1709 sg_res--;
1710 if (!sg_res)
1711 sg = sg_next(sg); 1844 sg = sg_next(sg);
1712 } 1845 }
1713 return 0; 1846 return 0;
@@ -2015,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2015 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2148 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2016 return 0; 2149 return 0;
2017 return iommu_prepare_identity_map(pdev, rmrr->base_address, 2150 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2018 rmrr->end_address + 1); 2151 rmrr->end_address);
2019} 2152}
2020 2153
2021#ifdef CONFIG_DMAR_FLOPPY_WA 2154#ifdef CONFIG_DMAR_FLOPPY_WA
@@ -2029,7 +2162,7 @@ static inline void iommu_prepare_isa(void)
2029 return; 2162 return;
2030 2163
2031 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); 2164 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2032 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 2165 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2033 2166
2034 if (ret) 2167 if (ret)
2035 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " 2168 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@@ -2105,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev)
2105 if (likely(!iommu_identity_mapping)) 2238 if (likely(!iommu_identity_mapping))
2106 return 0; 2239 return 0;
2107 2240
2241 info = pdev->dev.archdata.iommu;
2242 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2243 return (info->domain == si_domain);
2108 2244
2109 list_for_each_entry(info, &si_domain->devices, link)
2110 if (info->dev == pdev)
2111 return 1;
2112 return 0; 2245 return 0;
2113} 2246}
2114 2247
@@ -2186,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2186 * Assume that they will -- if they turn out not to be, then we can 2319 * Assume that they will -- if they turn out not to be, then we can
2187 * take them out of the 1:1 domain later. 2320 * take them out of the 1:1 domain later.
2188 */ 2321 */
2189 if (!startup) 2322 if (!startup) {
2190 return pdev->dma_mask > DMA_BIT_MASK(32); 2323 /*
2324 * If the device's dma_mask is less than the system's memory
2325 * size then this is not a candidate for identity mapping.
2326 */
2327 u64 dma_mask = pdev->dma_mask;
2328
2329 if (pdev->dev.coherent_dma_mask &&
2330 pdev->dev.coherent_dma_mask < dma_mask)
2331 dma_mask = pdev->dev.coherent_dma_mask;
2332
2333 return dma_mask >= dma_get_required_mask(&pdev->dev);
2334 }
2191 2335
2192 return 1; 2336 return 1;
2193} 2337}
@@ -2202,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2202 return -EFAULT; 2346 return -EFAULT;
2203 2347
2204 for_each_pci_dev(pdev) { 2348 for_each_pci_dev(pdev) {
2349 /* Skip Host/PCI Bridge devices */
2350 if (IS_BRIDGE_HOST_DEVICE(pdev))
2351 continue;
2205 if (iommu_should_identity_map(pdev, 1)) { 2352 if (iommu_should_identity_map(pdev, 1)) {
2206 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", 2353 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2207 hw ? "hardware" : "software", pci_name(pdev)); 2354 hw ? "hardware" : "software", pci_name(pdev));
@@ -2217,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2217 return 0; 2364 return 0;
2218} 2365}
2219 2366
2220static int __init init_dmars(int force_on) 2367static int __init init_dmars(void)
2221{ 2368{
2222 struct dmar_drhd_unit *drhd; 2369 struct dmar_drhd_unit *drhd;
2223 struct dmar_rmrr_unit *rmrr; 2370 struct dmar_rmrr_unit *rmrr;
@@ -2591,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2591 iommu = domain_get_iommu(domain); 2738 iommu = domain_get_iommu(domain);
2592 size = aligned_nrpages(paddr, size); 2739 size = aligned_nrpages(paddr, size);
2593 2740
2594 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), 2741 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2595 pdev->dma_mask);
2596 if (!iova) 2742 if (!iova)
2597 goto error; 2743 goto error;
2598 2744
@@ -3117,7 +3263,17 @@ static int init_iommu_hw(void)
3117 if (iommu->qi) 3263 if (iommu->qi)
3118 dmar_reenable_qi(iommu); 3264 dmar_reenable_qi(iommu);
3119 3265
3120 for_each_active_iommu(iommu, drhd) { 3266 for_each_iommu(iommu, drhd) {
3267 if (drhd->ignored) {
3268 /*
3269 * we always have to disable PMRs or DMA may fail on
3270 * this device
3271 */
3272 if (force_on)
3273 iommu_disable_protect_mem_regions(iommu);
3274 continue;
3275 }
3276
3121 iommu_flush_write_buffer(iommu); 3277 iommu_flush_write_buffer(iommu);
3122 3278
3123 iommu_set_root_entry(iommu); 3279 iommu_set_root_entry(iommu);
@@ -3126,7 +3282,8 @@ static int init_iommu_hw(void)
3126 DMA_CCMD_GLOBAL_INVL); 3282 DMA_CCMD_GLOBAL_INVL);
3127 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3283 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3128 DMA_TLB_GLOBAL_FLUSH); 3284 DMA_TLB_GLOBAL_FLUSH);
3129 iommu_enable_translation(iommu); 3285 if (iommu_enable_translation(iommu))
3286 return 1;
3130 iommu_disable_protect_mem_regions(iommu); 3287 iommu_disable_protect_mem_regions(iommu);
3131 } 3288 }
3132 3289
@@ -3193,7 +3350,10 @@ static void iommu_resume(void)
3193 unsigned long flag; 3350 unsigned long flag;
3194 3351
3195 if (init_iommu_hw()) { 3352 if (init_iommu_hw()) {
3196 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 3353 if (force_on)
3354 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3355 else
3356 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3197 return; 3357 return;
3198 } 3358 }
3199 3359
@@ -3228,7 +3388,7 @@ static void __init init_iommu_pm_ops(void)
3228} 3388}
3229 3389
3230#else 3390#else
3231static inline int init_iommu_pm_ops(void) { } 3391static inline void init_iommu_pm_ops(void) {}
3232#endif /* CONFIG_PM */ 3392#endif /* CONFIG_PM */
3233 3393
3234/* 3394/*
@@ -3270,7 +3430,6 @@ static struct notifier_block device_nb = {
3270int __init intel_iommu_init(void) 3430int __init intel_iommu_init(void)
3271{ 3431{
3272 int ret = 0; 3432 int ret = 0;
3273 int force_on = 0;
3274 3433
3275 /* VT-d is required for a TXT/tboot launch, so enforce that */ 3434 /* VT-d is required for a TXT/tboot launch, so enforce that */
3276 force_on = tboot_force_iommu(); 3435 force_on = tboot_force_iommu();
@@ -3308,7 +3467,7 @@ int __init intel_iommu_init(void)
3308 3467
3309 init_no_remapping_devices(); 3468 init_no_remapping_devices();
3310 3469
3311 ret = init_dmars(force_on); 3470 ret = init_dmars();
3312 if (ret) { 3471 if (ret) {
3313 if (force_on) 3472 if (force_on)
3314 panic("tboot: Failed to initialize DMARs\n"); 3473 panic("tboot: Failed to initialize DMARs\n");
@@ -3379,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
3379 spin_lock_irqsave(&device_domain_lock, flags); 3538 spin_lock_irqsave(&device_domain_lock, flags);
3380 list_for_each_safe(entry, tmp, &domain->devices) { 3539 list_for_each_safe(entry, tmp, &domain->devices) {
3381 info = list_entry(entry, struct device_domain_info, link); 3540 info = list_entry(entry, struct device_domain_info, link);
3382 /* No need to compare PCI domain; it has to be the same */ 3541 if (info->segment == pci_domain_nr(pdev->bus) &&
3383 if (info->bus == pdev->bus->number && 3542 info->bus == pdev->bus->number &&
3384 info->devfn == pdev->devfn) { 3543 info->devfn == pdev->devfn) {
3385 list_del(&info->link); 3544 list_del(&info->link);
3386 list_del(&info->global); 3545 list_del(&info->global);
@@ -3418,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
3418 domain_update_iommu_cap(domain); 3577 domain_update_iommu_cap(domain);
3419 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); 3578 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3420 3579
3421 spin_lock_irqsave(&iommu->lock, tmp_flags); 3580 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3422 clear_bit(domain->id, iommu->domain_ids); 3581 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3423 iommu->domains[domain->id] = NULL; 3582 spin_lock_irqsave(&iommu->lock, tmp_flags);
3424 spin_unlock_irqrestore(&iommu->lock, tmp_flags); 3583 clear_bit(domain->id, iommu->domain_ids);
3584 iommu->domains[domain->id] = NULL;
3585 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3586 }
3425 } 3587 }
3426 3588
3427 spin_unlock_irqrestore(&device_domain_lock, flags); 3589 spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -3504,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3504 domain->iommu_count = 0; 3666 domain->iommu_count = 0;
3505 domain->iommu_coherency = 0; 3667 domain->iommu_coherency = 0;
3506 domain->iommu_snooping = 0; 3668 domain->iommu_snooping = 0;
3669 domain->iommu_superpage = 0;
3507 domain->max_addr = 0; 3670 domain->max_addr = 0;
3508 domain->nid = -1; 3671 domain->nid = -1;
3509 3672
@@ -3719,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3719 struct dma_pte *pte; 3882 struct dma_pte *pte;
3720 u64 phys = 0; 3883 u64 phys = 0;
3721 3884
3722 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); 3885 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3723 if (pte) 3886 if (pte)
3724 phys = dma_pte_addr(pte); 3887 phys = dma_pte_addr(pte);
3725 3888