aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c324
1 files changed, 249 insertions, 75 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 4789f8e8bf7a..f02c34d26d1b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -36,9 +36,10 @@
36#include <linux/iova.h> 36#include <linux/iova.h>
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/sysdev.h> 39#include <linux/syscore_ops.h>
40#include <linux/tboot.h> 40#include <linux/tboot.h>
41#include <linux/dmi.h> 41#include <linux/dmi.h>
42#include <linux/pci-ats.h>
42#include <asm/cacheflush.h> 43#include <asm/cacheflush.h>
43#include <asm/iommu.h> 44#include <asm/iommu.h>
44#include "pci.h" 45#include "pci.h"
@@ -46,6 +47,8 @@
46#define ROOT_SIZE VTD_PAGE_SIZE 47#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE 48#define CONTEXT_SIZE VTD_PAGE_SIZE
48 49
50#define IS_BRIDGE_HOST_DEVICE(pdev) \
51 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -115,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
115 return (pfn + level_size(level) - 1) & level_mask(level); 118 return (pfn + level_size(level) - 1) & level_mask(level);
116} 119}
117 120
121static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
122{
123 return 1 << ((lvl - 1) * LEVEL_STRIDE);
124}
125
118/* VT-d pages must always be _smaller_ than MM pages. Otherwise things 126/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
119 are never going to work. */ 127 are never going to work. */
120static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 128static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -142,6 +150,12 @@ static void __init check_tylersburg_isoch(void);
142static int rwbf_quirk; 150static int rwbf_quirk;
143 151
144/* 152/*
153 * set to 1 to panic kernel if can't successfully enable VT-d
154 * (used when kernel is launched w/ TXT)
155 */
156static int force_on = 0;
157
158/*
145 * 0: Present 159 * 0: Present
146 * 1-11: Reserved 160 * 1-11: Reserved
147 * 12-63: Context Ptr (12 - (haw-1)) 161 * 12-63: Context Ptr (12 - (haw-1))
@@ -337,6 +351,9 @@ struct dmar_domain {
337 int iommu_coherency;/* indicate coherency of iommu access */ 351 int iommu_coherency;/* indicate coherency of iommu access */
338 int iommu_snooping; /* indicate snooping control feature*/ 352 int iommu_snooping; /* indicate snooping control feature*/
339 int iommu_count; /* reference count of iommu */ 353 int iommu_count; /* reference count of iommu */
354 int iommu_superpage;/* Level of superpages supported:
355 0 == 4KiB (no superpages), 1 == 2MiB,
356 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
340 spinlock_t iommu_lock; /* protect iommu set in domain */ 357 spinlock_t iommu_lock; /* protect iommu set in domain */
341 u64 max_addr; /* maximum mapped address */ 358 u64 max_addr; /* maximum mapped address */
342}; 359};
@@ -386,6 +403,7 @@ int dmar_disabled = 1;
386static int dmar_map_gfx = 1; 403static int dmar_map_gfx = 1;
387static int dmar_forcedac; 404static int dmar_forcedac;
388static int intel_iommu_strict; 405static int intel_iommu_strict;
406static int intel_iommu_superpage = 1;
389 407
390#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 408#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
391static DEFINE_SPINLOCK(device_domain_lock); 409static DEFINE_SPINLOCK(device_domain_lock);
@@ -416,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
416 printk(KERN_INFO 434 printk(KERN_INFO
417 "Intel-IOMMU: disable batched IOTLB flush\n"); 435 "Intel-IOMMU: disable batched IOTLB flush\n");
418 intel_iommu_strict = 1; 436 intel_iommu_strict = 1;
437 } else if (!strncmp(str, "sp_off", 6)) {
438 printk(KERN_INFO
439 "Intel-IOMMU: disable supported super page\n");
440 intel_iommu_superpage = 0;
419 } 441 }
420 442
421 str += strcspn(str, ","); 443 str += strcspn(str, ",");
@@ -554,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
554 } 576 }
555} 577}
556 578
579static void domain_update_iommu_superpage(struct dmar_domain *domain)
580{
581 int i, mask = 0xf;
582
583 if (!intel_iommu_superpage) {
584 domain->iommu_superpage = 0;
585 return;
586 }
587
588 domain->iommu_superpage = 4; /* 1TiB */
589
590 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
591 mask |= cap_super_page_val(g_iommus[i]->cap);
592 if (!mask) {
593 break;
594 }
595 }
596 domain->iommu_superpage = fls(mask);
597}
598
557/* Some capabilities may be different across iommus */ 599/* Some capabilities may be different across iommus */
558static void domain_update_iommu_cap(struct dmar_domain *domain) 600static void domain_update_iommu_cap(struct dmar_domain *domain)
559{ 601{
560 domain_update_iommu_coherency(domain); 602 domain_update_iommu_coherency(domain);
561 domain_update_iommu_snooping(domain); 603 domain_update_iommu_snooping(domain);
604 domain_update_iommu_superpage(domain);
562} 605}
563 606
564static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) 607static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -688,23 +731,31 @@ out:
688} 731}
689 732
690static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 733static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
691 unsigned long pfn) 734 unsigned long pfn, int large_level)
692{ 735{
693 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 736 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
694 struct dma_pte *parent, *pte = NULL; 737 struct dma_pte *parent, *pte = NULL;
695 int level = agaw_to_level(domain->agaw); 738 int level = agaw_to_level(domain->agaw);
696 int offset; 739 int offset, target_level;
697 740
698 BUG_ON(!domain->pgd); 741 BUG_ON(!domain->pgd);
699 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); 742 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
700 parent = domain->pgd; 743 parent = domain->pgd;
701 744
745 /* Search pte */
746 if (!large_level)
747 target_level = 1;
748 else
749 target_level = large_level;
750
702 while (level > 0) { 751 while (level > 0) {
703 void *tmp_page; 752 void *tmp_page;
704 753
705 offset = pfn_level_offset(pfn, level); 754 offset = pfn_level_offset(pfn, level);
706 pte = &parent[offset]; 755 pte = &parent[offset];
707 if (level == 1) 756 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
757 break;
758 if (level == target_level)
708 break; 759 break;
709 760
710 if (!dma_pte_present(pte)) { 761 if (!dma_pte_present(pte)) {
@@ -732,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
732 return pte; 783 return pte;
733} 784}
734 785
786
735/* return address's pte at specific level */ 787/* return address's pte at specific level */
736static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 788static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
737 unsigned long pfn, 789 unsigned long pfn,
738 int level) 790 int level, int *large_page)
739{ 791{
740 struct dma_pte *parent, *pte = NULL; 792 struct dma_pte *parent, *pte = NULL;
741 int total = agaw_to_level(domain->agaw); 793 int total = agaw_to_level(domain->agaw);
@@ -748,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
748 if (level == total) 800 if (level == total)
749 return pte; 801 return pte;
750 802
751 if (!dma_pte_present(pte)) 803 if (!dma_pte_present(pte)) {
804 *large_page = total;
752 break; 805 break;
806 }
807
808 if (pte->val & DMA_PTE_LARGE_PAGE) {
809 *large_page = total;
810 return pte;
811 }
812
753 parent = phys_to_virt(dma_pte_addr(pte)); 813 parent = phys_to_virt(dma_pte_addr(pte));
754 total--; 814 total--;
755 } 815 }
@@ -762,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
762 unsigned long last_pfn) 822 unsigned long last_pfn)
763{ 823{
764 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 824 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
825 unsigned int large_page = 1;
765 struct dma_pte *first_pte, *pte; 826 struct dma_pte *first_pte, *pte;
766 827
767 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 828 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -770,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
770 831
771 /* we don't need lock here; nobody else touches the iova range */ 832 /* we don't need lock here; nobody else touches the iova range */
772 do { 833 do {
773 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); 834 large_page = 1;
835 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
774 if (!pte) { 836 if (!pte) {
775 start_pfn = align_to_level(start_pfn + 1, 2); 837 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
776 continue; 838 continue;
777 } 839 }
778 do { 840 do {
779 dma_clear_pte(pte); 841 dma_clear_pte(pte);
780 start_pfn++; 842 start_pfn += lvl_to_nr_pages(large_page);
781 pte++; 843 pte++;
782 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 844 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
783 845
@@ -797,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
797 int total = agaw_to_level(domain->agaw); 859 int total = agaw_to_level(domain->agaw);
798 int level; 860 int level;
799 unsigned long tmp; 861 unsigned long tmp;
862 int large_page = 2;
800 863
801 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 864 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
802 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 865 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -812,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
812 return; 875 return;
813 876
814 do { 877 do {
815 first_pte = pte = dma_pfn_level_pte(domain, tmp, level); 878 large_page = level;
879 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
880 if (large_page > level)
881 level = large_page + 1;
816 if (!pte) { 882 if (!pte) {
817 tmp = align_to_level(tmp + 1, level + 1); 883 tmp = align_to_level(tmp + 1, level + 1);
818 continue; 884 continue;
@@ -1206,7 +1272,7 @@ void free_dmar_iommu(struct intel_iommu *iommu)
1206 iommu_disable_translation(iommu); 1272 iommu_disable_translation(iommu);
1207 1273
1208 if (iommu->irq) { 1274 if (iommu->irq) {
1209 set_irq_data(iommu->irq, NULL); 1275 irq_set_handler_data(iommu->irq, NULL);
1210 /* This will mask the irq */ 1276 /* This will mask the irq */
1211 free_irq(iommu->irq, iommu); 1277 free_irq(iommu->irq, iommu);
1212 destroy_irq(iommu->irq); 1278 destroy_irq(iommu->irq);
@@ -1299,7 +1365,7 @@ static void iommu_detach_domain(struct dmar_domain *domain,
1299static struct iova_domain reserved_iova_list; 1365static struct iova_domain reserved_iova_list;
1300static struct lock_class_key reserved_rbtree_key; 1366static struct lock_class_key reserved_rbtree_key;
1301 1367
1302static void dmar_init_reserved_ranges(void) 1368static int dmar_init_reserved_ranges(void)
1303{ 1369{
1304 struct pci_dev *pdev = NULL; 1370 struct pci_dev *pdev = NULL;
1305 struct iova *iova; 1371 struct iova *iova;
@@ -1313,8 +1379,10 @@ static void dmar_init_reserved_ranges(void)
1313 /* IOAPIC ranges shouldn't be accessed by DMA */ 1379 /* IOAPIC ranges shouldn't be accessed by DMA */
1314 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), 1380 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1315 IOVA_PFN(IOAPIC_RANGE_END)); 1381 IOVA_PFN(IOAPIC_RANGE_END));
1316 if (!iova) 1382 if (!iova) {
1317 printk(KERN_ERR "Reserve IOAPIC range failed\n"); 1383 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1384 return -ENODEV;
1385 }
1318 1386
1319 /* Reserve all PCI MMIO to avoid peer-to-peer access */ 1387 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1320 for_each_pci_dev(pdev) { 1388 for_each_pci_dev(pdev) {
@@ -1327,11 +1395,13 @@ static void dmar_init_reserved_ranges(void)
1327 iova = reserve_iova(&reserved_iova_list, 1395 iova = reserve_iova(&reserved_iova_list,
1328 IOVA_PFN(r->start), 1396 IOVA_PFN(r->start),
1329 IOVA_PFN(r->end)); 1397 IOVA_PFN(r->end));
1330 if (!iova) 1398 if (!iova) {
1331 printk(KERN_ERR "Reserve iova failed\n"); 1399 printk(KERN_ERR "Reserve iova failed\n");
1400 return -ENODEV;
1401 }
1332 } 1402 }
1333 } 1403 }
1334 1404 return 0;
1335} 1405}
1336 1406
1337static void domain_reserve_special_ranges(struct dmar_domain *domain) 1407static void domain_reserve_special_ranges(struct dmar_domain *domain)
@@ -1392,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1392 else 1462 else
1393 domain->iommu_snooping = 0; 1463 domain->iommu_snooping = 0;
1394 1464
1465 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1395 domain->iommu_count = 1; 1466 domain->iommu_count = 1;
1396 domain->nid = iommu->node; 1467 domain->nid = iommu->node;
1397 1468
@@ -1412,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
1412 if (!domain) 1483 if (!domain)
1413 return; 1484 return;
1414 1485
1486 /* Flush any lazy unmaps that may reference this domain */
1487 if (!intel_iommu_strict)
1488 flush_unmaps_timeout(0);
1489
1415 domain_remove_dev_info(domain); 1490 domain_remove_dev_info(domain);
1416 /* destroy iovas */ 1491 /* destroy iovas */
1417 put_iova_domain(&domain->iovad); 1492 put_iova_domain(&domain->iovad);
@@ -1643,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
1643 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 1718 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1644} 1719}
1645 1720
1721/* Return largest possible superpage level for a given mapping */
1722static inline int hardware_largepage_caps(struct dmar_domain *domain,
1723 unsigned long iov_pfn,
1724 unsigned long phy_pfn,
1725 unsigned long pages)
1726{
1727 int support, level = 1;
1728 unsigned long pfnmerge;
1729
1730 support = domain->iommu_superpage;
1731
1732 /* To use a large page, the virtual *and* physical addresses
1733 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1734 of them will mean we have to use smaller pages. So just
1735 merge them and check both at once. */
1736 pfnmerge = iov_pfn | phy_pfn;
1737
1738 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1739 pages >>= VTD_STRIDE_SHIFT;
1740 if (!pages)
1741 break;
1742 pfnmerge >>= VTD_STRIDE_SHIFT;
1743 level++;
1744 support--;
1745 }
1746 return level;
1747}
1748
1646static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1749static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1647 struct scatterlist *sg, unsigned long phys_pfn, 1750 struct scatterlist *sg, unsigned long phys_pfn,
1648 unsigned long nr_pages, int prot) 1751 unsigned long nr_pages, int prot)
@@ -1651,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1651 phys_addr_t uninitialized_var(pteval); 1754 phys_addr_t uninitialized_var(pteval);
1652 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 1755 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1653 unsigned long sg_res; 1756 unsigned long sg_res;
1757 unsigned int largepage_lvl = 0;
1758 unsigned long lvl_pages = 0;
1654 1759
1655 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); 1760 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1656 1761
@@ -1666,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1666 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; 1771 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1667 } 1772 }
1668 1773
1669 while (nr_pages--) { 1774 while (nr_pages > 0) {
1670 uint64_t tmp; 1775 uint64_t tmp;
1671 1776
1672 if (!sg_res) { 1777 if (!sg_res) {
@@ -1674,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1674 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; 1779 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1675 sg->dma_length = sg->length; 1780 sg->dma_length = sg->length;
1676 pteval = page_to_phys(sg_page(sg)) | prot; 1781 pteval = page_to_phys(sg_page(sg)) | prot;
1782 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1677 } 1783 }
1784
1678 if (!pte) { 1785 if (!pte) {
1679 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); 1786 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1787
1788 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1680 if (!pte) 1789 if (!pte)
1681 return -ENOMEM; 1790 return -ENOMEM;
1791 /* It is large page*/
1792 if (largepage_lvl > 1)
1793 pteval |= DMA_PTE_LARGE_PAGE;
1794 else
1795 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1796
1682 } 1797 }
1683 /* We don't need lock here, nobody else 1798 /* We don't need lock here, nobody else
1684 * touches the iova range 1799 * touches the iova range
@@ -1694,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1694 } 1809 }
1695 WARN_ON(1); 1810 WARN_ON(1);
1696 } 1811 }
1812
1813 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1814
1815 BUG_ON(nr_pages < lvl_pages);
1816 BUG_ON(sg_res < lvl_pages);
1817
1818 nr_pages -= lvl_pages;
1819 iov_pfn += lvl_pages;
1820 phys_pfn += lvl_pages;
1821 pteval += lvl_pages * VTD_PAGE_SIZE;
1822 sg_res -= lvl_pages;
1823
1824 /* If the next PTE would be the first in a new page, then we
1825 need to flush the cache on the entries we've just written.
1826 And then we'll need to recalculate 'pte', so clear it and
1827 let it get set again in the if (!pte) block above.
1828
1829 If we're done (!nr_pages) we need to flush the cache too.
1830
1831 Also if we've been setting superpages, we may need to
1832 recalculate 'pte' and switch back to smaller pages for the
1833 end of the mapping, if the trailing size is not enough to
1834 use another superpage (i.e. sg_res < lvl_pages). */
1697 pte++; 1835 pte++;
1698 if (!nr_pages || first_pte_in_page(pte)) { 1836 if (!nr_pages || first_pte_in_page(pte) ||
1837 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1699 domain_flush_cache(domain, first_pte, 1838 domain_flush_cache(domain, first_pte,
1700 (void *)pte - (void *)first_pte); 1839 (void *)pte - (void *)first_pte);
1701 pte = NULL; 1840 pte = NULL;
1702 } 1841 }
1703 iov_pfn++; 1842
1704 pteval += VTD_PAGE_SIZE; 1843 if (!sg_res && nr_pages)
1705 sg_res--;
1706 if (!sg_res)
1707 sg = sg_next(sg); 1844 sg = sg_next(sg);
1708 } 1845 }
1709 return 0; 1846 return 0;
@@ -1835,7 +1972,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1835 1972
1836 ret = iommu_attach_domain(domain, iommu); 1973 ret = iommu_attach_domain(domain, iommu);
1837 if (ret) { 1974 if (ret) {
1838 domain_exit(domain); 1975 free_domain_mem(domain);
1839 goto error; 1976 goto error;
1840 } 1977 }
1841 1978
@@ -2011,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2011 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2148 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2012 return 0; 2149 return 0;
2013 return iommu_prepare_identity_map(pdev, rmrr->base_address, 2150 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2014 rmrr->end_address + 1); 2151 rmrr->end_address);
2015} 2152}
2016 2153
2017#ifdef CONFIG_DMAR_FLOPPY_WA 2154#ifdef CONFIG_DMAR_FLOPPY_WA
@@ -2025,7 +2162,7 @@ static inline void iommu_prepare_isa(void)
2025 return; 2162 return;
2026 2163
2027 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); 2164 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2028 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 2165 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2029 2166
2030 if (ret) 2167 if (ret)
2031 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " 2168 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@@ -2101,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev)
2101 if (likely(!iommu_identity_mapping)) 2238 if (likely(!iommu_identity_mapping))
2102 return 0; 2239 return 0;
2103 2240
2241 info = pdev->dev.archdata.iommu;
2242 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2243 return (info->domain == si_domain);
2104 2244
2105 list_for_each_entry(info, &si_domain->devices, link)
2106 if (info->dev == pdev)
2107 return 1;
2108 return 0; 2245 return 0;
2109} 2246}
2110 2247
@@ -2182,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2182 * Assume that they will -- if they turn out not to be, then we can 2319 * Assume that they will -- if they turn out not to be, then we can
2183 * take them out of the 1:1 domain later. 2320 * take them out of the 1:1 domain later.
2184 */ 2321 */
2185 if (!startup) 2322 if (!startup) {
2186 return pdev->dma_mask > DMA_BIT_MASK(32); 2323 /*
2324 * If the device's dma_mask is less than the system's memory
2325 * size then this is not a candidate for identity mapping.
2326 */
2327 u64 dma_mask = pdev->dma_mask;
2328
2329 if (pdev->dev.coherent_dma_mask &&
2330 pdev->dev.coherent_dma_mask < dma_mask)
2331 dma_mask = pdev->dev.coherent_dma_mask;
2332
2333 return dma_mask >= dma_get_required_mask(&pdev->dev);
2334 }
2187 2335
2188 return 1; 2336 return 1;
2189} 2337}
@@ -2198,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2198 return -EFAULT; 2346 return -EFAULT;
2199 2347
2200 for_each_pci_dev(pdev) { 2348 for_each_pci_dev(pdev) {
2349 /* Skip Host/PCI Bridge devices */
2350 if (IS_BRIDGE_HOST_DEVICE(pdev))
2351 continue;
2201 if (iommu_should_identity_map(pdev, 1)) { 2352 if (iommu_should_identity_map(pdev, 1)) {
2202 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", 2353 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2203 hw ? "hardware" : "software", pci_name(pdev)); 2354 hw ? "hardware" : "software", pci_name(pdev));
@@ -2213,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
2213 return 0; 2364 return 0;
2214} 2365}
2215 2366
2216int __init init_dmars(void) 2367static int __init init_dmars(void)
2217{ 2368{
2218 struct dmar_drhd_unit *drhd; 2369 struct dmar_drhd_unit *drhd;
2219 struct dmar_rmrr_unit *rmrr; 2370 struct dmar_rmrr_unit *rmrr;
@@ -2265,7 +2416,7 @@ int __init init_dmars(void)
2265 /* 2416 /*
2266 * TBD: 2417 * TBD:
2267 * we could share the same root & context tables 2418 * we could share the same root & context tables
2268 * amoung all IOMMU's. Need to Split it later. 2419 * among all IOMMU's. Need to Split it later.
2269 */ 2420 */
2270 ret = iommu_alloc_root_entry(iommu); 2421 ret = iommu_alloc_root_entry(iommu);
2271 if (ret) { 2422 if (ret) {
@@ -2393,8 +2544,15 @@ int __init init_dmars(void)
2393 * enable translation 2544 * enable translation
2394 */ 2545 */
2395 for_each_drhd_unit(drhd) { 2546 for_each_drhd_unit(drhd) {
2396 if (drhd->ignored) 2547 if (drhd->ignored) {
2548 /*
2549 * we always have to disable PMRs or DMA may fail on
2550 * this device
2551 */
2552 if (force_on)
2553 iommu_disable_protect_mem_regions(drhd->iommu);
2397 continue; 2554 continue;
2555 }
2398 iommu = drhd->iommu; 2556 iommu = drhd->iommu;
2399 2557
2400 iommu_flush_write_buffer(iommu); 2558 iommu_flush_write_buffer(iommu);
@@ -2580,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2580 iommu = domain_get_iommu(domain); 2738 iommu = domain_get_iommu(domain);
2581 size = aligned_nrpages(paddr, size); 2739 size = aligned_nrpages(paddr, size);
2582 2740
2583 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), 2741 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2584 pdev->dma_mask);
2585 if (!iova) 2742 if (!iova)
2586 goto error; 2743 goto error;
2587 2744
@@ -3106,7 +3263,17 @@ static int init_iommu_hw(void)
3106 if (iommu->qi) 3263 if (iommu->qi)
3107 dmar_reenable_qi(iommu); 3264 dmar_reenable_qi(iommu);
3108 3265
3109 for_each_active_iommu(iommu, drhd) { 3266 for_each_iommu(iommu, drhd) {
3267 if (drhd->ignored) {
3268 /*
3269 * we always have to disable PMRs or DMA may fail on
3270 * this device
3271 */
3272 if (force_on)
3273 iommu_disable_protect_mem_regions(iommu);
3274 continue;
3275 }
3276
3110 iommu_flush_write_buffer(iommu); 3277 iommu_flush_write_buffer(iommu);
3111 3278
3112 iommu_set_root_entry(iommu); 3279 iommu_set_root_entry(iommu);
@@ -3115,7 +3282,8 @@ static int init_iommu_hw(void)
3115 DMA_CCMD_GLOBAL_INVL); 3282 DMA_CCMD_GLOBAL_INVL);
3116 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3283 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3117 DMA_TLB_GLOBAL_FLUSH); 3284 DMA_TLB_GLOBAL_FLUSH);
3118 iommu_enable_translation(iommu); 3285 if (iommu_enable_translation(iommu))
3286 return 1;
3119 iommu_disable_protect_mem_regions(iommu); 3287 iommu_disable_protect_mem_regions(iommu);
3120 } 3288 }
3121 3289
@@ -3135,7 +3303,7 @@ static void iommu_flush_all(void)
3135 } 3303 }
3136} 3304}
3137 3305
3138static int iommu_suspend(struct sys_device *dev, pm_message_t state) 3306static int iommu_suspend(void)
3139{ 3307{
3140 struct dmar_drhd_unit *drhd; 3308 struct dmar_drhd_unit *drhd;
3141 struct intel_iommu *iommu = NULL; 3309 struct intel_iommu *iommu = NULL;
@@ -3175,15 +3343,18 @@ nomem:
3175 return -ENOMEM; 3343 return -ENOMEM;
3176} 3344}
3177 3345
3178static int iommu_resume(struct sys_device *dev) 3346static void iommu_resume(void)
3179{ 3347{
3180 struct dmar_drhd_unit *drhd; 3348 struct dmar_drhd_unit *drhd;
3181 struct intel_iommu *iommu = NULL; 3349 struct intel_iommu *iommu = NULL;
3182 unsigned long flag; 3350 unsigned long flag;
3183 3351
3184 if (init_iommu_hw()) { 3352 if (init_iommu_hw()) {
3185 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 3353 if (force_on)
3186 return -EIO; 3354 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3355 else
3356 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3357 return;
3187 } 3358 }
3188 3359
3189 for_each_active_iommu(iommu, drhd) { 3360 for_each_active_iommu(iommu, drhd) {
@@ -3204,40 +3375,20 @@ static int iommu_resume(struct sys_device *dev)
3204 3375
3205 for_each_active_iommu(iommu, drhd) 3376 for_each_active_iommu(iommu, drhd)
3206 kfree(iommu->iommu_state); 3377 kfree(iommu->iommu_state);
3207
3208 return 0;
3209} 3378}
3210 3379
3211static struct sysdev_class iommu_sysclass = { 3380static struct syscore_ops iommu_syscore_ops = {
3212 .name = "iommu",
3213 .resume = iommu_resume, 3381 .resume = iommu_resume,
3214 .suspend = iommu_suspend, 3382 .suspend = iommu_suspend,
3215}; 3383};
3216 3384
3217static struct sys_device device_iommu = { 3385static void __init init_iommu_pm_ops(void)
3218 .cls = &iommu_sysclass,
3219};
3220
3221static int __init init_iommu_sysfs(void)
3222{ 3386{
3223 int error; 3387 register_syscore_ops(&iommu_syscore_ops);
3224
3225 error = sysdev_class_register(&iommu_sysclass);
3226 if (error)
3227 return error;
3228
3229 error = sysdev_register(&device_iommu);
3230 if (error)
3231 sysdev_class_unregister(&iommu_sysclass);
3232
3233 return error;
3234} 3388}
3235 3389
3236#else 3390#else
3237static int __init init_iommu_sysfs(void) 3391static inline void init_iommu_pm_ops(void) {}
3238{
3239 return 0;
3240}
3241#endif /* CONFIG_PM */ 3392#endif /* CONFIG_PM */
3242 3393
3243/* 3394/*
@@ -3260,9 +3411,15 @@ static int device_notifier(struct notifier_block *nb,
3260 if (!domain) 3411 if (!domain)
3261 return 0; 3412 return 0;
3262 3413
3263 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) 3414 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3264 domain_remove_one_dev_info(domain, pdev); 3415 domain_remove_one_dev_info(domain, pdev);
3265 3416
3417 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3418 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3419 list_empty(&domain->devices))
3420 domain_exit(domain);
3421 }
3422
3266 return 0; 3423 return 0;
3267} 3424}
3268 3425
@@ -3273,7 +3430,6 @@ static struct notifier_block device_nb = {
3273int __init intel_iommu_init(void) 3430int __init intel_iommu_init(void)
3274{ 3431{
3275 int ret = 0; 3432 int ret = 0;
3276 int force_on = 0;
3277 3433
3278 /* VT-d is required for a TXT/tboot launch, so enforce that */ 3434 /* VT-d is required for a TXT/tboot launch, so enforce that */
3279 force_on = tboot_force_iommu(); 3435 force_on = tboot_force_iommu();
@@ -3297,8 +3453,17 @@ int __init intel_iommu_init(void)
3297 if (no_iommu || dmar_disabled) 3453 if (no_iommu || dmar_disabled)
3298 return -ENODEV; 3454 return -ENODEV;
3299 3455
3300 iommu_init_mempool(); 3456 if (iommu_init_mempool()) {
3301 dmar_init_reserved_ranges(); 3457 if (force_on)
3458 panic("tboot: Failed to initialize iommu memory\n");
3459 return -ENODEV;
3460 }
3461
3462 if (dmar_init_reserved_ranges()) {
3463 if (force_on)
3464 panic("tboot: Failed to reserve iommu ranges\n");
3465 return -ENODEV;
3466 }
3302 3467
3303 init_no_remapping_devices(); 3468 init_no_remapping_devices();
3304 3469
@@ -3320,7 +3485,7 @@ int __init intel_iommu_init(void)
3320#endif 3485#endif
3321 dma_ops = &intel_dma_ops; 3486 dma_ops = &intel_dma_ops;
3322 3487
3323 init_iommu_sysfs(); 3488 init_iommu_pm_ops();
3324 3489
3325 register_iommu(&intel_iommu_ops); 3490 register_iommu(&intel_iommu_ops);
3326 3491
@@ -3373,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
3373 spin_lock_irqsave(&device_domain_lock, flags); 3538 spin_lock_irqsave(&device_domain_lock, flags);
3374 list_for_each_safe(entry, tmp, &domain->devices) { 3539 list_for_each_safe(entry, tmp, &domain->devices) {
3375 info = list_entry(entry, struct device_domain_info, link); 3540 info = list_entry(entry, struct device_domain_info, link);
3376 /* No need to compare PCI domain; it has to be the same */ 3541 if (info->segment == pci_domain_nr(pdev->bus) &&
3377 if (info->bus == pdev->bus->number && 3542 info->bus == pdev->bus->number &&
3378 info->devfn == pdev->devfn) { 3543 info->devfn == pdev->devfn) {
3379 list_del(&info->link); 3544 list_del(&info->link);
3380 list_del(&info->global); 3545 list_del(&info->global);
@@ -3411,6 +3576,14 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
3411 domain->iommu_count--; 3576 domain->iommu_count--;
3412 domain_update_iommu_cap(domain); 3577 domain_update_iommu_cap(domain);
3413 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); 3578 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3579
3580 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3581 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3582 spin_lock_irqsave(&iommu->lock, tmp_flags);
3583 clear_bit(domain->id, iommu->domain_ids);
3584 iommu->domains[domain->id] = NULL;
3585 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3586 }
3414 } 3587 }
3415 3588
3416 spin_unlock_irqrestore(&device_domain_lock, flags); 3589 spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -3493,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3493 domain->iommu_count = 0; 3666 domain->iommu_count = 0;
3494 domain->iommu_coherency = 0; 3667 domain->iommu_coherency = 0;
3495 domain->iommu_snooping = 0; 3668 domain->iommu_snooping = 0;
3669 domain->iommu_superpage = 0;
3496 domain->max_addr = 0; 3670 domain->max_addr = 0;
3497 domain->nid = -1; 3671 domain->nid = -1;
3498 3672
@@ -3627,9 +3801,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3627 3801
3628 pte = dmar_domain->pgd; 3802 pte = dmar_domain->pgd;
3629 if (dma_pte_present(pte)) { 3803 if (dma_pte_present(pte)) {
3630 free_pgtable_page(dmar_domain->pgd);
3631 dmar_domain->pgd = (struct dma_pte *) 3804 dmar_domain->pgd = (struct dma_pte *)
3632 phys_to_virt(dma_pte_addr(pte)); 3805 phys_to_virt(dma_pte_addr(pte));
3806 free_pgtable_page(pte);
3633 } 3807 }
3634 dmar_domain->agaw--; 3808 dmar_domain->agaw--;
3635 } 3809 }
@@ -3708,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3708 struct dma_pte *pte; 3882 struct dma_pte *pte;
3709 u64 phys = 0; 3883 u64 phys = 0;
3710 3884
3711 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); 3885 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3712 if (pte) 3886 if (pte)
3713 phys = dma_pte_addr(pte); 3887 phys = dma_pte_addr(pte);
3714 3888