aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-06-01 16:48:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-01 16:48:50 -0400
commitf0f52a9463839c52a63c05d6e7d4a330d94a9794 (patch)
treed964f65aa37dd46b457bc615f2f37e75390c6df8
parent0f48f2600911d5de6393829e4a9986d4075558b3 (diff)
parent70e535d1e5d1e4317e894d6228b762cf9c3fbc6a (diff)
Merge git://git.infradead.org/iommu-2.6
* git://git.infradead.org/iommu-2.6: intel-iommu: Fix off-by-one in RMRR setup intel-iommu: Add domain check in domain_remove_one_dev_info intel-iommu: Remove Host Bridge devices from identity mapping intel-iommu: Use coherent DMA mask when requested intel-iommu: Dont cache iova above 32bit intel-iommu: Speed up processing of the identity_mapping function intel-iommu: Check for identity mapping candidate using system dma mask intel-iommu: Only unlink device domains from iommu intel-iommu: Enable super page (2MiB, 1GiB, etc.) support intel-iommu: Flush unmaps at domain_exit intel-iommu: Remove obsolete comment from detect_intel_iommu intel-iommu: fix VT-d PMR disable for TXT on S3 resume
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--drivers/pci/dmar.c7
-rw-r--r--drivers/pci/intel-iommu.c240
-rw-r--r--drivers/pci/iova.c12
-rw-r--r--include/linux/dma_remapping.h4
5 files changed, 220 insertions, 48 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5438a2d7907f..d9a203b058f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
999 With this option on every unmap_single operation will 999 With this option on every unmap_single operation will
1000 result in a hardware IOTLB flush operation as opposed 1000 result in a hardware IOTLB flush operation as opposed
1001 to batching them for performance. 1001 to batching them for performance.
1002 1002 sp_off [Default Off]
1003 By default, super page will be supported if Intel IOMMU
1004 has the capability. With this option, super page will
1005 not be supported.
1003 intremap= [X86-64, Intel-IOMMU] 1006 intremap= [X86-64, Intel-IOMMU]
1004 Format: { on (default) | off | nosid } 1007 Format: { on (default) | off | nosid }
1005 on enable Interrupt Remapping (default) 1008 on enable Interrupt Remapping (default)
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 12e02bf92c4a..3dc9befa5aec 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -698,12 +698,7 @@ int __init detect_intel_iommu(void)
698 { 698 {
699#ifdef CONFIG_INTR_REMAP 699#ifdef CONFIG_INTR_REMAP
700 struct acpi_table_dmar *dmar; 700 struct acpi_table_dmar *dmar;
701 /* 701
702 * for now we will disable dma-remapping when interrupt
703 * remapping is enabled.
704 * When support for queued invalidation for IOTLB invalidation
705 * is added, we will not need this any more.
706 */
707 dmar = (struct acpi_table_dmar *) dmar_tbl; 702 dmar = (struct acpi_table_dmar *) dmar_tbl;
708 if (ret && cpu_has_x2apic && dmar->flags & 0x1) 703 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
709 printk(KERN_INFO 704 printk(KERN_INFO
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6af6b628175b..59f17acf7f68 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -47,6 +47,8 @@
47#define ROOT_SIZE VTD_PAGE_SIZE 47#define ROOT_SIZE VTD_PAGE_SIZE
48#define CONTEXT_SIZE VTD_PAGE_SIZE 48#define CONTEXT_SIZE VTD_PAGE_SIZE
49 49
50#define IS_BRIDGE_HOST_DEVICE(pdev) \
51 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
50#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
51#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
52#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
116 return (pfn + level_size(level) - 1) & level_mask(level); 118 return (pfn + level_size(level) - 1) & level_mask(level);
117} 119}
118 120
121static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
122{
123 return 1 << ((lvl - 1) * LEVEL_STRIDE);
124}
125
119/* VT-d pages must always be _smaller_ than MM pages. Otherwise things 126/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
120 are never going to work. */ 127 are never going to work. */
121static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 128static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -143,6 +150,12 @@ static void __init check_tylersburg_isoch(void);
143static int rwbf_quirk; 150static int rwbf_quirk;
144 151
145/* 152/*
153 * set to 1 to panic kernel if can't successfully enable VT-d
154 * (used when kernel is launched w/ TXT)
155 */
156static int force_on = 0;
157
158/*
146 * 0: Present 159 * 0: Present
147 * 1-11: Reserved 160 * 1-11: Reserved
148 * 12-63: Context Ptr (12 - (haw-1)) 161 * 12-63: Context Ptr (12 - (haw-1))
@@ -338,6 +351,9 @@ struct dmar_domain {
338 int iommu_coherency;/* indicate coherency of iommu access */ 351 int iommu_coherency;/* indicate coherency of iommu access */
339 int iommu_snooping; /* indicate snooping control feature*/ 352 int iommu_snooping; /* indicate snooping control feature*/
340 int iommu_count; /* reference count of iommu */ 353 int iommu_count; /* reference count of iommu */
354 int iommu_superpage;/* Level of superpages supported:
355 0 == 4KiB (no superpages), 1 == 2MiB,
356 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
341 spinlock_t iommu_lock; /* protect iommu set in domain */ 357 spinlock_t iommu_lock; /* protect iommu set in domain */
342 u64 max_addr; /* maximum mapped address */ 358 u64 max_addr; /* maximum mapped address */
343}; 359};
@@ -387,6 +403,7 @@ int dmar_disabled = 1;
387static int dmar_map_gfx = 1; 403static int dmar_map_gfx = 1;
388static int dmar_forcedac; 404static int dmar_forcedac;
389static int intel_iommu_strict; 405static int intel_iommu_strict;
406static int intel_iommu_superpage = 1;
390 407
391#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 408#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
392static DEFINE_SPINLOCK(device_domain_lock); 409static DEFINE_SPINLOCK(device_domain_lock);
@@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
417 printk(KERN_INFO 434 printk(KERN_INFO
418 "Intel-IOMMU: disable batched IOTLB flush\n"); 435 "Intel-IOMMU: disable batched IOTLB flush\n");
419 intel_iommu_strict = 1; 436 intel_iommu_strict = 1;
437 } else if (!strncmp(str, "sp_off", 6)) {
438 printk(KERN_INFO
439 "Intel-IOMMU: disable supported super page\n");
440 intel_iommu_superpage = 0;
420 } 441 }
421 442
422 str += strcspn(str, ","); 443 str += strcspn(str, ",");
@@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
555 } 576 }
556} 577}
557 578
579static void domain_update_iommu_superpage(struct dmar_domain *domain)
580{
581 int i, mask = 0xf;
582
583 if (!intel_iommu_superpage) {
584 domain->iommu_superpage = 0;
585 return;
586 }
587
588 domain->iommu_superpage = 4; /* 1TiB */
589
590 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
591 mask |= cap_super_page_val(g_iommus[i]->cap);
592 if (!mask) {
593 break;
594 }
595 }
596 domain->iommu_superpage = fls(mask);
597}
598
558/* Some capabilities may be different across iommus */ 599/* Some capabilities may be different across iommus */
559static void domain_update_iommu_cap(struct dmar_domain *domain) 600static void domain_update_iommu_cap(struct dmar_domain *domain)
560{ 601{
561 domain_update_iommu_coherency(domain); 602 domain_update_iommu_coherency(domain);
562 domain_update_iommu_snooping(domain); 603 domain_update_iommu_snooping(domain);
604 domain_update_iommu_superpage(domain);
563} 605}
564 606
565static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) 607static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -689,23 +731,31 @@ out:
689} 731}
690 732
691static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 733static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
692 unsigned long pfn) 734 unsigned long pfn, int large_level)
693{ 735{
694 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 736 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
695 struct dma_pte *parent, *pte = NULL; 737 struct dma_pte *parent, *pte = NULL;
696 int level = agaw_to_level(domain->agaw); 738 int level = agaw_to_level(domain->agaw);
697 int offset; 739 int offset, target_level;
698 740
699 BUG_ON(!domain->pgd); 741 BUG_ON(!domain->pgd);
700 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); 742 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
701 parent = domain->pgd; 743 parent = domain->pgd;
702 744
745 /* Search pte */
746 if (!large_level)
747 target_level = 1;
748 else
749 target_level = large_level;
750
703 while (level > 0) { 751 while (level > 0) {
704 void *tmp_page; 752 void *tmp_page;
705 753
706 offset = pfn_level_offset(pfn, level); 754 offset = pfn_level_offset(pfn, level);
707 pte = &parent[offset]; 755 pte = &parent[offset];
708 if (level == 1) 756 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
757 break;
758 if (level == target_level)
709 break; 759 break;
710 760
711 if (!dma_pte_present(pte)) { 761 if (!dma_pte_present(pte)) {
@@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
733 return pte; 783 return pte;
734} 784}
735 785
786
736/* return address's pte at specific level */ 787/* return address's pte at specific level */
737static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 788static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
738 unsigned long pfn, 789 unsigned long pfn,
739 int level) 790 int level, int *large_page)
740{ 791{
741 struct dma_pte *parent, *pte = NULL; 792 struct dma_pte *parent, *pte = NULL;
742 int total = agaw_to_level(domain->agaw); 793 int total = agaw_to_level(domain->agaw);
@@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
749 if (level == total) 800 if (level == total)
750 return pte; 801 return pte;
751 802
752 if (!dma_pte_present(pte)) 803 if (!dma_pte_present(pte)) {
804 *large_page = total;
753 break; 805 break;
806 }
807
808 if (pte->val & DMA_PTE_LARGE_PAGE) {
809 *large_page = total;
810 return pte;
811 }
812
754 parent = phys_to_virt(dma_pte_addr(pte)); 813 parent = phys_to_virt(dma_pte_addr(pte));
755 total--; 814 total--;
756 } 815 }
@@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
763 unsigned long last_pfn) 822 unsigned long last_pfn)
764{ 823{
765 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 824 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
825 unsigned int large_page = 1;
766 struct dma_pte *first_pte, *pte; 826 struct dma_pte *first_pte, *pte;
767 827
768 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 828 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
771 831
772 /* we don't need lock here; nobody else touches the iova range */ 832 /* we don't need lock here; nobody else touches the iova range */
773 do { 833 do {
774 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); 834 large_page = 1;
835 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
775 if (!pte) { 836 if (!pte) {
776 start_pfn = align_to_level(start_pfn + 1, 2); 837 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
777 continue; 838 continue;
778 } 839 }
779 do { 840 do {
780 dma_clear_pte(pte); 841 dma_clear_pte(pte);
781 start_pfn++; 842 start_pfn += lvl_to_nr_pages(large_page);
782 pte++; 843 pte++;
783 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 844 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
784 845
@@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
798 int total = agaw_to_level(domain->agaw); 859 int total = agaw_to_level(domain->agaw);
799 int level; 860 int level;
800 unsigned long tmp; 861 unsigned long tmp;
862 int large_page = 2;
801 863
802 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 864 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
803 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 865 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
813 return; 875 return;
814 876
815 do { 877 do {
816 first_pte = pte = dma_pfn_level_pte(domain, tmp, level); 878 large_page = level;
879 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
880 if (large_page > level)
881 level = large_page + 1;
817 if (!pte) { 882 if (!pte) {
818 tmp = align_to_level(tmp + 1, level + 1); 883 tmp = align_to_level(tmp + 1, level + 1);
819 continue; 884 continue;
@@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1397 else 1462 else
1398 domain->iommu_snooping = 0; 1463 domain->iommu_snooping = 0;
1399 1464
1465 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1400 domain->iommu_count = 1; 1466 domain->iommu_count = 1;
1401 domain->nid = iommu->node; 1467 domain->nid = iommu->node;
1402 1468
@@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
1417 if (!domain) 1483 if (!domain)
1418 return; 1484 return;
1419 1485
1486 /* Flush any lazy unmaps that may reference this domain */
1487 if (!intel_iommu_strict)
1488 flush_unmaps_timeout(0);
1489
1420 domain_remove_dev_info(domain); 1490 domain_remove_dev_info(domain);
1421 /* destroy iovas */ 1491 /* destroy iovas */
1422 put_iova_domain(&domain->iovad); 1492 put_iova_domain(&domain->iovad);
@@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
1648 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 1718 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1649} 1719}
1650 1720
1721/* Return largest possible superpage level for a given mapping */
1722static inline int hardware_largepage_caps(struct dmar_domain *domain,
1723 unsigned long iov_pfn,
1724 unsigned long phy_pfn,
1725 unsigned long pages)
1726{
1727 int support, level = 1;
1728 unsigned long pfnmerge;
1729
1730 support = domain->iommu_superpage;
1731
1732 /* To use a large page, the virtual *and* physical addresses
1733 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1734 of them will mean we have to use smaller pages. So just
1735 merge them and check both at once. */
1736 pfnmerge = iov_pfn | phy_pfn;
1737
1738 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1739 pages >>= VTD_STRIDE_SHIFT;
1740 if (!pages)
1741 break;
1742 pfnmerge >>= VTD_STRIDE_SHIFT;
1743 level++;
1744 support--;
1745 }
1746 return level;
1747}
1748
1651static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1749static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1652 struct scatterlist *sg, unsigned long phys_pfn, 1750 struct scatterlist *sg, unsigned long phys_pfn,
1653 unsigned long nr_pages, int prot) 1751 unsigned long nr_pages, int prot)
@@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1656 phys_addr_t uninitialized_var(pteval); 1754 phys_addr_t uninitialized_var(pteval);
1657 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 1755 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1658 unsigned long sg_res; 1756 unsigned long sg_res;
1757 unsigned int largepage_lvl = 0;
1758 unsigned long lvl_pages = 0;
1659 1759
1660 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); 1760 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1661 1761
@@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1671 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; 1771 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1672 } 1772 }
1673 1773
1674 while (nr_pages--) { 1774 while (nr_pages > 0) {
1675 uint64_t tmp; 1775 uint64_t tmp;
1676 1776
1677 if (!sg_res) { 1777 if (!sg_res) {
@@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1679 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; 1779 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1680 sg->dma_length = sg->length; 1780 sg->dma_length = sg->length;
1681 pteval = page_to_phys(sg_page(sg)) | prot; 1781 pteval = page_to_phys(sg_page(sg)) | prot;
1782 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1682 } 1783 }
1784
1683 if (!pte) { 1785 if (!pte) {
1684 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); 1786 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1787
1788 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1685 if (!pte) 1789 if (!pte)
1686 return -ENOMEM; 1790 return -ENOMEM;
1791 /* It is large page*/
1792 if (largepage_lvl > 1)
1793 pteval |= DMA_PTE_LARGE_PAGE;
1794 else
1795 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1796
1687 } 1797 }
1688 /* We don't need lock here, nobody else 1798 /* We don't need lock here, nobody else
1689 * touches the iova range 1799 * touches the iova range
@@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1699 } 1809 }
1700 WARN_ON(1); 1810 WARN_ON(1);
1701 } 1811 }
1812
1813 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1814
1815 BUG_ON(nr_pages < lvl_pages);
1816 BUG_ON(sg_res < lvl_pages);
1817
1818 nr_pages -= lvl_pages;
1819 iov_pfn += lvl_pages;
1820 phys_pfn += lvl_pages;
1821 pteval += lvl_pages * VTD_PAGE_SIZE;
1822 sg_res -= lvl_pages;
1823
1824 /* If the next PTE would be the first in a new page, then we
1825 need to flush the cache on the entries we've just written.
1826 And then we'll need to recalculate 'pte', so clear it and
1827 let it get set again in the if (!pte) block above.
1828
1829 If we're done (!nr_pages) we need to flush the cache too.
1830
1831 Also if we've been setting superpages, we may need to
1832 recalculate 'pte' and switch back to smaller pages for the
1833 end of the mapping, if the trailing size is not enough to
1834 use another superpage (i.e. sg_res < lvl_pages). */
1702 pte++; 1835 pte++;
1703 if (!nr_pages || first_pte_in_page(pte)) { 1836 if (!nr_pages || first_pte_in_page(pte) ||
1837 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1704 domain_flush_cache(domain, first_pte, 1838 domain_flush_cache(domain, first_pte,
1705 (void *)pte - (void *)first_pte); 1839 (void *)pte - (void *)first_pte);
1706 pte = NULL; 1840 pte = NULL;
1707 } 1841 }
1708 iov_pfn++; 1842
1709 pteval += VTD_PAGE_SIZE; 1843 if (!sg_res && nr_pages)
1710 sg_res--;
1711 if (!sg_res)
1712 sg = sg_next(sg); 1844 sg = sg_next(sg);
1713 } 1845 }
1714 return 0; 1846 return 0;
@@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2016 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2148 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2017 return 0; 2149 return 0;
2018 return iommu_prepare_identity_map(pdev, rmrr->base_address, 2150 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2019 rmrr->end_address + 1); 2151 rmrr->end_address);
2020} 2152}
2021 2153
2022#ifdef CONFIG_DMAR_FLOPPY_WA 2154#ifdef CONFIG_DMAR_FLOPPY_WA
@@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void)
2030 return; 2162 return;
2031 2163
2032 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); 2164 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2033 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 2165 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2034 2166
2035 if (ret) 2167 if (ret)
2036 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " 2168 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev)
2106 if (likely(!iommu_identity_mapping)) 2238 if (likely(!iommu_identity_mapping))
2107 return 0; 2239 return 0;
2108 2240
2241 info = pdev->dev.archdata.iommu;
2242 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2243 return (info->domain == si_domain);
2109 2244
2110 list_for_each_entry(info, &si_domain->devices, link)
2111 if (info->dev == pdev)
2112 return 1;
2113 return 0; 2245 return 0;
2114} 2246}
2115 2247
@@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2187 * Assume that they will -- if they turn out not to be, then we can 2319 * Assume that they will -- if they turn out not to be, then we can
2188 * take them out of the 1:1 domain later. 2320 * take them out of the 1:1 domain later.
2189 */ 2321 */
2190 if (!startup) 2322 if (!startup) {
2191 return pdev->dma_mask > DMA_BIT_MASK(32); 2323 /*
2324 * If the device's dma_mask is less than the system's memory
2325 * size then this is not a candidate for identity mapping.
2326 */
2327 u64 dma_mask = pdev->dma_mask;
2328
2329 if (pdev->dev.coherent_dma_mask &&
2330 pdev->dev.coherent_dma_mask < dma_mask)
2331 dma_mask = pdev->dev.coherent_dma_mask;
2332
2333 return dma_mask >= dma_get_required_mask(&pdev->dev);
2334 }
2192 2335
2193 return 1; 2336 return 1;
2194} 2337}
@@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2203 return -EFAULT; 2346 return -EFAULT;
2204 2347
2205 for_each_pci_dev(pdev) { 2348 for_each_pci_dev(pdev) {
2349 /* Skip Host/PCI Bridge devices */
2350 if (IS_BRIDGE_HOST_DEVICE(pdev))
2351 continue;
2206 if (iommu_should_identity_map(pdev, 1)) { 2352 if (iommu_should_identity_map(pdev, 1)) {
2207 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", 2353 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2208 hw ? "hardware" : "software", pci_name(pdev)); 2354 hw ? "hardware" : "software", pci_name(pdev));
@@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2218 return 0; 2364 return 0;
2219} 2365}
2220 2366
2221static int __init init_dmars(int force_on) 2367static int __init init_dmars(void)
2222{ 2368{
2223 struct dmar_drhd_unit *drhd; 2369 struct dmar_drhd_unit *drhd;
2224 struct dmar_rmrr_unit *rmrr; 2370 struct dmar_rmrr_unit *rmrr;
@@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2592 iommu = domain_get_iommu(domain); 2738 iommu = domain_get_iommu(domain);
2593 size = aligned_nrpages(paddr, size); 2739 size = aligned_nrpages(paddr, size);
2594 2740
2595 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), 2741 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2596 pdev->dma_mask);
2597 if (!iova) 2742 if (!iova)
2598 goto error; 2743 goto error;
2599 2744
@@ -3118,7 +3263,17 @@ static int init_iommu_hw(void)
3118 if (iommu->qi) 3263 if (iommu->qi)
3119 dmar_reenable_qi(iommu); 3264 dmar_reenable_qi(iommu);
3120 3265
3121 for_each_active_iommu(iommu, drhd) { 3266 for_each_iommu(iommu, drhd) {
3267 if (drhd->ignored) {
3268 /*
3269 * we always have to disable PMRs or DMA may fail on
3270 * this device
3271 */
3272 if (force_on)
3273 iommu_disable_protect_mem_regions(iommu);
3274 continue;
3275 }
3276
3122 iommu_flush_write_buffer(iommu); 3277 iommu_flush_write_buffer(iommu);
3123 3278
3124 iommu_set_root_entry(iommu); 3279 iommu_set_root_entry(iommu);
@@ -3127,7 +3282,8 @@ static int init_iommu_hw(void)
3127 DMA_CCMD_GLOBAL_INVL); 3282 DMA_CCMD_GLOBAL_INVL);
3128 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3283 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3129 DMA_TLB_GLOBAL_FLUSH); 3284 DMA_TLB_GLOBAL_FLUSH);
3130 iommu_enable_translation(iommu); 3285 if (iommu_enable_translation(iommu))
3286 return 1;
3131 iommu_disable_protect_mem_regions(iommu); 3287 iommu_disable_protect_mem_regions(iommu);
3132 } 3288 }
3133 3289
@@ -3194,7 +3350,10 @@ static void iommu_resume(void)
3194 unsigned long flag; 3350 unsigned long flag;
3195 3351
3196 if (init_iommu_hw()) { 3352 if (init_iommu_hw()) {
3197 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 3353 if (force_on)
3354 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3355 else
3356 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3198 return; 3357 return;
3199 } 3358 }
3200 3359
@@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = {
3271int __init intel_iommu_init(void) 3430int __init intel_iommu_init(void)
3272{ 3431{
3273 int ret = 0; 3432 int ret = 0;
3274 int force_on = 0;
3275 3433
3276 /* VT-d is required for a TXT/tboot launch, so enforce that */ 3434 /* VT-d is required for a TXT/tboot launch, so enforce that */
3277 force_on = tboot_force_iommu(); 3435 force_on = tboot_force_iommu();
@@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void)
3309 3467
3310 init_no_remapping_devices(); 3468 init_no_remapping_devices();
3311 3469
3312 ret = init_dmars(force_on); 3470 ret = init_dmars();
3313 if (ret) { 3471 if (ret) {
3314 if (force_on) 3472 if (force_on)
3315 panic("tboot: Failed to initialize DMARs\n"); 3473 panic("tboot: Failed to initialize DMARs\n");
@@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
3380 spin_lock_irqsave(&device_domain_lock, flags); 3538 spin_lock_irqsave(&device_domain_lock, flags);
3381 list_for_each_safe(entry, tmp, &domain->devices) { 3539 list_for_each_safe(entry, tmp, &domain->devices) {
3382 info = list_entry(entry, struct device_domain_info, link); 3540 info = list_entry(entry, struct device_domain_info, link);
3383 /* No need to compare PCI domain; it has to be the same */ 3541 if (info->segment == pci_domain_nr(pdev->bus) &&
3384 if (info->bus == pdev->bus->number && 3542 info->bus == pdev->bus->number &&
3385 info->devfn == pdev->devfn) { 3543 info->devfn == pdev->devfn) {
3386 list_del(&info->link); 3544 list_del(&info->link);
3387 list_del(&info->global); 3545 list_del(&info->global);
@@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
3419 domain_update_iommu_cap(domain); 3577 domain_update_iommu_cap(domain);
3420 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); 3578 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3421 3579
3422 spin_lock_irqsave(&iommu->lock, tmp_flags); 3580 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3423 clear_bit(domain->id, iommu->domain_ids); 3581 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3424 iommu->domains[domain->id] = NULL; 3582 spin_lock_irqsave(&iommu->lock, tmp_flags);
3425 spin_unlock_irqrestore(&iommu->lock, tmp_flags); 3583 clear_bit(domain->id, iommu->domain_ids);
3584 iommu->domains[domain->id] = NULL;
3585 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3586 }
3426 } 3587 }
3427 3588
3428 spin_unlock_irqrestore(&device_domain_lock, flags); 3589 spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3505 domain->iommu_count = 0; 3666 domain->iommu_count = 0;
3506 domain->iommu_coherency = 0; 3667 domain->iommu_coherency = 0;
3507 domain->iommu_snooping = 0; 3668 domain->iommu_snooping = 0;
3669 domain->iommu_superpage = 0;
3508 domain->max_addr = 0; 3670 domain->max_addr = 0;
3509 domain->nid = -1; 3671 domain->nid = -1;
3510 3672
@@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3720 struct dma_pte *pte; 3882 struct dma_pte *pte;
3721 u64 phys = 0; 3883 u64 phys = 0;
3722 3884
3723 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); 3885 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3724 if (pte) 3886 if (pte)
3725 phys = dma_pte_addr(pte); 3887 phys = dma_pte_addr(pte);
3726 3888
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 9606e599a475..c5c274ab5c5a 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
63 curr = iovad->cached32_node; 63 curr = iovad->cached32_node;
64 cached_iova = container_of(curr, struct iova, node); 64 cached_iova = container_of(curr, struct iova, node);
65 65
66 if (free->pfn_lo >= cached_iova->pfn_lo) 66 if (free->pfn_lo >= cached_iova->pfn_lo) {
67 iovad->cached32_node = rb_next(&free->node); 67 struct rb_node *node = rb_next(&free->node);
68 struct iova *iova = container_of(node, struct iova, node);
69
70 /* only cache if it's below 32bit pfn */
71 if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
72 iovad->cached32_node = node;
73 else
74 iovad->cached32_node = NULL;
75 }
68} 76}
69 77
70/* Computes the padding size required, to make the 78/* Computes the padding size required, to make the
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 5619f8522738..bbd8661b3473 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,8 +9,12 @@
9#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) 9#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
10#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) 10#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
11 11
12#define VTD_STRIDE_SHIFT (9)
13#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
14
12#define DMA_PTE_READ (1) 15#define DMA_PTE_READ (1)
13#define DMA_PTE_WRITE (2) 16#define DMA_PTE_WRITE (2)
17#define DMA_PTE_LARGE_PAGE (1 << 7)
14#define DMA_PTE_SNP (1 << 11) 18#define DMA_PTE_SNP (1 << 11)
15 19
16#define CONTEXT_TT_MULTI_LEVEL 0 20#define CONTEXT_TT_MULTI_LEVEL 0