diff options
author | Fenghua Yu <fenghua.yu@intel.com> | 2009-04-24 20:30:20 -0400 |
---|---|---|
committer | David Woodhouse <David.Woodhouse@intel.com> | 2009-04-29 01:54:34 -0400 |
commit | 4ed0d3e6c64cfd9ba4ceb2099b10d1cf8ece4320 (patch) | |
tree | 950bacfaf57040aafbcc2ea9b52eb171d35c23bd /drivers/pci/intel-iommu.c | |
parent | 091069740304c979f957ceacec39c461d0192158 (diff) |
Intel IOMMU Pass Through Support
The patch adds kernel parameter intel_iommu=pt to set up pass through
mode in context mapping entry. This disables DMAR in linux kernel; but
KVM still runs on VT-d and interrupt remapping still works.
In this mode, kernel uses swiotlb for DMA API functions but other VT-d
functionalities are enabled for KVM. KVM always uses multi level
translation page table in VT-d. By default, pass though mode is disabled
in kernel.
This is useful when people don't want to enable VT-d DMAR in kernel but
still want to use KVM and interrupt remapping for reasons like DMAR
performance concern or debug purpose.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Weidong Han <weidong@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r-- | drivers/pci/intel-iommu.c | 180 |
1 files changed, 133 insertions, 47 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 001b328adf80..13121821db7f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
@@ -53,6 +53,8 @@ | |||
53 | 53 | ||
54 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 | 54 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 |
55 | 55 | ||
56 | #define MAX_AGAW_WIDTH 64 | ||
57 | |||
56 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) | 58 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) |
57 | 59 | ||
58 | #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) | 60 | #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) |
@@ -127,8 +129,6 @@ static inline void context_set_fault_enable(struct context_entry *context) | |||
127 | context->lo &= (((u64)-1) << 2) | 1; | 129 | context->lo &= (((u64)-1) << 2) | 1; |
128 | } | 130 | } |
129 | 131 | ||
130 | #define CONTEXT_TT_MULTI_LEVEL 0 | ||
131 | |||
132 | static inline void context_set_translation_type(struct context_entry *context, | 132 | static inline void context_set_translation_type(struct context_entry *context, |
133 | unsigned long value) | 133 | unsigned long value) |
134 | { | 134 | { |
@@ -288,6 +288,7 @@ int dmar_disabled = 1; | |||
288 | static int __initdata dmar_map_gfx = 1; | 288 | static int __initdata dmar_map_gfx = 1; |
289 | static int dmar_forcedac; | 289 | static int dmar_forcedac; |
290 | static int intel_iommu_strict; | 290 | static int intel_iommu_strict; |
291 | int iommu_pass_through; | ||
291 | 292 | ||
292 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) | 293 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) |
293 | static DEFINE_SPINLOCK(device_domain_lock); | 294 | static DEFINE_SPINLOCK(device_domain_lock); |
@@ -397,17 +398,13 @@ void free_iova_mem(struct iova *iova) | |||
397 | 398 | ||
398 | static inline int width_to_agaw(int width); | 399 | static inline int width_to_agaw(int width); |
399 | 400 | ||
400 | /* calculate agaw for each iommu. | 401 | static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) |
401 | * "SAGAW" may be different across iommus, use a default agaw, and | ||
402 | * get a supported less agaw for iommus that don't support the default agaw. | ||
403 | */ | ||
404 | int iommu_calculate_agaw(struct intel_iommu *iommu) | ||
405 | { | 402 | { |
406 | unsigned long sagaw; | 403 | unsigned long sagaw; |
407 | int agaw = -1; | 404 | int agaw = -1; |
408 | 405 | ||
409 | sagaw = cap_sagaw(iommu->cap); | 406 | sagaw = cap_sagaw(iommu->cap); |
410 | for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); | 407 | for (agaw = width_to_agaw(max_gaw); |
411 | agaw >= 0; agaw--) { | 408 | agaw >= 0; agaw--) { |
412 | if (test_bit(agaw, &sagaw)) | 409 | if (test_bit(agaw, &sagaw)) |
413 | break; | 410 | break; |
@@ -416,6 +413,24 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) | |||
416 | return agaw; | 413 | return agaw; |
417 | } | 414 | } |
418 | 415 | ||
416 | /* | ||
417 | * Calculate max SAGAW for each iommu. | ||
418 | */ | ||
419 | int iommu_calculate_max_sagaw(struct intel_iommu *iommu) | ||
420 | { | ||
421 | return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); | ||
422 | } | ||
423 | |||
424 | /* | ||
425 | * calculate agaw for each iommu. | ||
426 | * "SAGAW" may be different across iommus, use a default agaw, and | ||
427 | * get a supported less agaw for iommus that don't support the default agaw. | ||
428 | */ | ||
429 | int iommu_calculate_agaw(struct intel_iommu *iommu) | ||
430 | { | ||
431 | return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
432 | } | ||
433 | |||
419 | /* in native case, each domain is related to only one iommu */ | 434 | /* in native case, each domain is related to only one iommu */ |
420 | static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) | 435 | static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) |
421 | { | 436 | { |
@@ -1321,8 +1336,8 @@ static void domain_exit(struct dmar_domain *domain) | |||
1321 | free_domain_mem(domain); | 1336 | free_domain_mem(domain); |
1322 | } | 1337 | } |
1323 | 1338 | ||
1324 | static int domain_context_mapping_one(struct dmar_domain *domain, | 1339 | static int domain_context_mapping_one(struct dmar_domain *domain, int segment, |
1325 | int segment, u8 bus, u8 devfn) | 1340 | u8 bus, u8 devfn, int translation) |
1326 | { | 1341 | { |
1327 | struct context_entry *context; | 1342 | struct context_entry *context; |
1328 | unsigned long flags; | 1343 | unsigned long flags; |
@@ -1335,7 +1350,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1335 | 1350 | ||
1336 | pr_debug("Set context mapping for %02x:%02x.%d\n", | 1351 | pr_debug("Set context mapping for %02x:%02x.%d\n", |
1337 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); | 1352 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1353 | |||
1338 | BUG_ON(!domain->pgd); | 1354 | BUG_ON(!domain->pgd); |
1355 | BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && | ||
1356 | translation != CONTEXT_TT_MULTI_LEVEL); | ||
1339 | 1357 | ||
1340 | iommu = device_to_iommu(segment, bus, devfn); | 1358 | iommu = device_to_iommu(segment, bus, devfn); |
1341 | if (!iommu) | 1359 | if (!iommu) |
@@ -1395,9 +1413,18 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1395 | } | 1413 | } |
1396 | 1414 | ||
1397 | context_set_domain_id(context, id); | 1415 | context_set_domain_id(context, id); |
1398 | context_set_address_width(context, iommu->agaw); | 1416 | |
1399 | context_set_address_root(context, virt_to_phys(pgd)); | 1417 | /* |
1400 | context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); | 1418 | * In pass through mode, AW must be programmed to indicate the largest |
1419 | * AGAW value supported by hardware. And ASR is ignored by hardware. | ||
1420 | */ | ||
1421 | if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) { | ||
1422 | context_set_address_width(context, iommu->agaw); | ||
1423 | context_set_address_root(context, virt_to_phys(pgd)); | ||
1424 | } else | ||
1425 | context_set_address_width(context, iommu->msagaw); | ||
1426 | |||
1427 | context_set_translation_type(context, translation); | ||
1401 | context_set_fault_enable(context); | 1428 | context_set_fault_enable(context); |
1402 | context_set_present(context); | 1429 | context_set_present(context); |
1403 | domain_flush_cache(domain, context, sizeof(*context)); | 1430 | domain_flush_cache(domain, context, sizeof(*context)); |
@@ -1422,13 +1449,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1422 | } | 1449 | } |
1423 | 1450 | ||
1424 | static int | 1451 | static int |
1425 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | 1452 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, |
1453 | int translation) | ||
1426 | { | 1454 | { |
1427 | int ret; | 1455 | int ret; |
1428 | struct pci_dev *tmp, *parent; | 1456 | struct pci_dev *tmp, *parent; |
1429 | 1457 | ||
1430 | ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), | 1458 | ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), |
1431 | pdev->bus->number, pdev->devfn); | 1459 | pdev->bus->number, pdev->devfn, |
1460 | translation); | ||
1432 | if (ret) | 1461 | if (ret) |
1433 | return ret; | 1462 | return ret; |
1434 | 1463 | ||
@@ -1442,7 +1471,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | |||
1442 | ret = domain_context_mapping_one(domain, | 1471 | ret = domain_context_mapping_one(domain, |
1443 | pci_domain_nr(parent->bus), | 1472 | pci_domain_nr(parent->bus), |
1444 | parent->bus->number, | 1473 | parent->bus->number, |
1445 | parent->devfn); | 1474 | parent->devfn, translation); |
1446 | if (ret) | 1475 | if (ret) |
1447 | return ret; | 1476 | return ret; |
1448 | parent = parent->bus->self; | 1477 | parent = parent->bus->self; |
@@ -1450,12 +1479,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | |||
1450 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ | 1479 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ |
1451 | return domain_context_mapping_one(domain, | 1480 | return domain_context_mapping_one(domain, |
1452 | pci_domain_nr(tmp->subordinate), | 1481 | pci_domain_nr(tmp->subordinate), |
1453 | tmp->subordinate->number, 0); | 1482 | tmp->subordinate->number, 0, |
1483 | translation); | ||
1454 | else /* this is a legacy PCI bridge */ | 1484 | else /* this is a legacy PCI bridge */ |
1455 | return domain_context_mapping_one(domain, | 1485 | return domain_context_mapping_one(domain, |
1456 | pci_domain_nr(tmp->bus), | 1486 | pci_domain_nr(tmp->bus), |
1457 | tmp->bus->number, | 1487 | tmp->bus->number, |
1458 | tmp->devfn); | 1488 | tmp->devfn, |
1489 | translation); | ||
1459 | } | 1490 | } |
1460 | 1491 | ||
1461 | static int domain_context_mapped(struct pci_dev *pdev) | 1492 | static int domain_context_mapped(struct pci_dev *pdev) |
@@ -1752,7 +1783,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, | |||
1752 | goto error; | 1783 | goto error; |
1753 | 1784 | ||
1754 | /* context entry init */ | 1785 | /* context entry init */ |
1755 | ret = domain_context_mapping(domain, pdev); | 1786 | ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); |
1756 | if (!ret) | 1787 | if (!ret) |
1757 | return 0; | 1788 | return 0; |
1758 | error: | 1789 | error: |
@@ -1853,6 +1884,23 @@ static inline void iommu_prepare_isa(void) | |||
1853 | } | 1884 | } |
1854 | #endif /* !CONFIG_DMAR_FLPY_WA */ | 1885 | #endif /* !CONFIG_DMAR_FLPY_WA */ |
1855 | 1886 | ||
1887 | /* Initialize each context entry as pass through.*/ | ||
1888 | static int __init init_context_pass_through(void) | ||
1889 | { | ||
1890 | struct pci_dev *pdev = NULL; | ||
1891 | struct dmar_domain *domain; | ||
1892 | int ret; | ||
1893 | |||
1894 | for_each_pci_dev(pdev) { | ||
1895 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
1896 | ret = domain_context_mapping(domain, pdev, | ||
1897 | CONTEXT_TT_PASS_THROUGH); | ||
1898 | if (ret) | ||
1899 | return ret; | ||
1900 | } | ||
1901 | return 0; | ||
1902 | } | ||
1903 | |||
1856 | static int __init init_dmars(void) | 1904 | static int __init init_dmars(void) |
1857 | { | 1905 | { |
1858 | struct dmar_drhd_unit *drhd; | 1906 | struct dmar_drhd_unit *drhd; |
@@ -1860,6 +1908,7 @@ static int __init init_dmars(void) | |||
1860 | struct pci_dev *pdev; | 1908 | struct pci_dev *pdev; |
1861 | struct intel_iommu *iommu; | 1909 | struct intel_iommu *iommu; |
1862 | int i, ret; | 1910 | int i, ret; |
1911 | int pass_through = 1; | ||
1863 | 1912 | ||
1864 | /* | 1913 | /* |
1865 | * for each drhd | 1914 | * for each drhd |
@@ -1913,7 +1962,15 @@ static int __init init_dmars(void) | |||
1913 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); | 1962 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); |
1914 | goto error; | 1963 | goto error; |
1915 | } | 1964 | } |
1965 | if (!ecap_pass_through(iommu->ecap)) | ||
1966 | pass_through = 0; | ||
1916 | } | 1967 | } |
1968 | if (iommu_pass_through) | ||
1969 | if (!pass_through) { | ||
1970 | printk(KERN_INFO | ||
1971 | "Pass Through is not supported by hardware.\n"); | ||
1972 | iommu_pass_through = 0; | ||
1973 | } | ||
1917 | 1974 | ||
1918 | /* | 1975 | /* |
1919 | * Start from the sane iommu hardware state. | 1976 | * Start from the sane iommu hardware state. |
@@ -1976,37 +2033,57 @@ static int __init init_dmars(void) | |||
1976 | "IOMMU: enable interrupt remapping failed\n"); | 2033 | "IOMMU: enable interrupt remapping failed\n"); |
1977 | } | 2034 | } |
1978 | #endif | 2035 | #endif |
2036 | /* | ||
2037 | * If pass through is set and enabled, context entries of all pci | ||
2038 | * devices are intialized by pass through translation type. | ||
2039 | */ | ||
2040 | if (iommu_pass_through) { | ||
2041 | ret = init_context_pass_through(); | ||
2042 | if (ret) { | ||
2043 | printk(KERN_ERR "IOMMU: Pass through init failed.\n"); | ||
2044 | iommu_pass_through = 0; | ||
2045 | } | ||
2046 | } | ||
1979 | 2047 | ||
1980 | /* | 2048 | /* |
1981 | * For each rmrr | 2049 | * If pass through is not set or not enabled, setup context entries for |
1982 | * for each dev attached to rmrr | 2050 | * identity mappings for rmrr, gfx, and isa. |
1983 | * do | ||
1984 | * locate drhd for dev, alloc domain for dev | ||
1985 | * allocate free domain | ||
1986 | * allocate page table entries for rmrr | ||
1987 | * if context not allocated for bus | ||
1988 | * allocate and init context | ||
1989 | * set present in root table for this bus | ||
1990 | * init context with domain, translation etc | ||
1991 | * endfor | ||
1992 | * endfor | ||
1993 | */ | 2051 | */ |
1994 | for_each_rmrr_units(rmrr) { | 2052 | if (!iommu_pass_through) { |
1995 | for (i = 0; i < rmrr->devices_cnt; i++) { | 2053 | /* |
1996 | pdev = rmrr->devices[i]; | 2054 | * For each rmrr |
1997 | /* some BIOS lists non-exist devices in DMAR table */ | 2055 | * for each dev attached to rmrr |
1998 | if (!pdev) | 2056 | * do |
1999 | continue; | 2057 | * locate drhd for dev, alloc domain for dev |
2000 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | 2058 | * allocate free domain |
2001 | if (ret) | 2059 | * allocate page table entries for rmrr |
2002 | printk(KERN_ERR | 2060 | * if context not allocated for bus |
2061 | * allocate and init context | ||
2062 | * set present in root table for this bus | ||
2063 | * init context with domain, translation etc | ||
2064 | * endfor | ||
2065 | * endfor | ||
2066 | */ | ||
2067 | for_each_rmrr_units(rmrr) { | ||
2068 | for (i = 0; i < rmrr->devices_cnt; i++) { | ||
2069 | pdev = rmrr->devices[i]; | ||
2070 | /* | ||
2071 | * some BIOS lists non-exist devices in DMAR | ||
2072 | * table. | ||
2073 | */ | ||
2074 | if (!pdev) | ||
2075 | continue; | ||
2076 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | ||
2077 | if (ret) | ||
2078 | printk(KERN_ERR | ||
2003 | "IOMMU: mapping reserved region failed\n"); | 2079 | "IOMMU: mapping reserved region failed\n"); |
2080 | } | ||
2004 | } | 2081 | } |
2005 | } | ||
2006 | 2082 | ||
2007 | iommu_prepare_gfx_mapping(); | 2083 | iommu_prepare_gfx_mapping(); |
2008 | 2084 | ||
2009 | iommu_prepare_isa(); | 2085 | iommu_prepare_isa(); |
2086 | } | ||
2010 | 2087 | ||
2011 | /* | 2088 | /* |
2012 | * for each drhd | 2089 | * for each drhd |
@@ -2117,7 +2194,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev) | |||
2117 | 2194 | ||
2118 | /* make sure context mapping is ok */ | 2195 | /* make sure context mapping is ok */ |
2119 | if (unlikely(!domain_context_mapped(pdev))) { | 2196 | if (unlikely(!domain_context_mapped(pdev))) { |
2120 | ret = domain_context_mapping(domain, pdev); | 2197 | ret = domain_context_mapping(domain, pdev, |
2198 | CONTEXT_TT_MULTI_LEVEL); | ||
2121 | if (ret) { | 2199 | if (ret) { |
2122 | printk(KERN_ERR | 2200 | printk(KERN_ERR |
2123 | "Domain context map for %s failed", | 2201 | "Domain context map for %s failed", |
@@ -2786,7 +2864,7 @@ int __init intel_iommu_init(void) | |||
2786 | * Check the need for DMA-remapping initialization now. | 2864 | * Check the need for DMA-remapping initialization now. |
2787 | * Above initialization will also be used by Interrupt-remapping. | 2865 | * Above initialization will also be used by Interrupt-remapping. |
2788 | */ | 2866 | */ |
2789 | if (no_iommu || swiotlb || dmar_disabled) | 2867 | if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) |
2790 | return -ENODEV; | 2868 | return -ENODEV; |
2791 | 2869 | ||
2792 | iommu_init_mempool(); | 2870 | iommu_init_mempool(); |
@@ -2806,7 +2884,15 @@ int __init intel_iommu_init(void) | |||
2806 | 2884 | ||
2807 | init_timer(&unmap_timer); | 2885 | init_timer(&unmap_timer); |
2808 | force_iommu = 1; | 2886 | force_iommu = 1; |
2809 | dma_ops = &intel_dma_ops; | 2887 | |
2888 | if (!iommu_pass_through) { | ||
2889 | printk(KERN_INFO | ||
2890 | "Multi-level page-table translation for DMAR.\n"); | ||
2891 | dma_ops = &intel_dma_ops; | ||
2892 | } else | ||
2893 | printk(KERN_INFO | ||
2894 | "DMAR: Pass through translation for DMAR.\n"); | ||
2895 | |||
2810 | init_iommu_sysfs(); | 2896 | init_iommu_sysfs(); |
2811 | 2897 | ||
2812 | register_iommu(&intel_iommu_ops); | 2898 | register_iommu(&intel_iommu_ops); |
@@ -3146,7 +3232,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, | |||
3146 | return -EFAULT; | 3232 | return -EFAULT; |
3147 | } | 3233 | } |
3148 | 3234 | ||
3149 | ret = domain_context_mapping(dmar_domain, pdev); | 3235 | ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); |
3150 | if (ret) | 3236 | if (ret) |
3151 | return ret; | 3237 | return ret; |
3152 | 3238 | ||