diff options
author | Fenghua Yu <fenghua.yu@intel.com> | 2009-04-24 20:30:20 -0400 |
---|---|---|
committer | David Woodhouse <David.Woodhouse@intel.com> | 2009-04-29 01:54:34 -0400 |
commit | 4ed0d3e6c64cfd9ba4ceb2099b10d1cf8ece4320 (patch) | |
tree | 950bacfaf57040aafbcc2ea9b52eb171d35c23bd | |
parent | 091069740304c979f957ceacec39c461d0192158 (diff) |
Intel IOMMU Pass Through Support
The patch adds kernel parameter intel_iommu=pt to set up pass through
mode in context mapping entry. This disables DMAR in linux kernel; but
KVM still runs on VT-d and interrupt remapping still works.
In this mode, kernel uses swiotlb for DMA API functions but other VT-d
functionalities are enabled for KVM. KVM always uses multi level
translation page table in VT-d. By default, pass though mode is disabled
in kernel.
This is useful when people don't want to enable VT-d DMAR in kernel but
still want to use KVM and interrupt remapping for reasons like DMAR
performance concern or debug purpose.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Weidong Han <weidong@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
-rw-r--r-- | Documentation/kernel-parameters.txt | 1 | ||||
-rw-r--r-- | arch/ia64/include/asm/iommu.h | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/pci-swiotlb.c | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/iommu.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/pci-dma.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/pci-swiotlb.c | 3 | ||||
-rw-r--r-- | drivers/pci/dmar.c | 11 | ||||
-rw-r--r-- | drivers/pci/intel-iommu.c | 180 | ||||
-rw-r--r-- | include/linux/dma_remapping.h | 8 | ||||
-rw-r--r-- | include/linux/intel-iommu.h | 2 |
10 files changed, 165 insertions, 50 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 600cdd72900c..fa4faeb7597f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -965,6 +965,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
965 | nomerge | 965 | nomerge |
966 | forcesac | 966 | forcesac |
967 | soft | 967 | soft |
968 | pt [x86, IA64] | ||
968 | 969 | ||
969 | io7= [HW] IO7 for Marvel based alpha systems | 970 | io7= [HW] IO7 for Marvel based alpha systems |
970 | See comment before marvel_specify_io7 in | 971 | See comment before marvel_specify_io7 in |
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 0490794fe4aa..37d41ca5645a 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h | |||
@@ -9,6 +9,7 @@ extern void pci_iommu_shutdown(void); | |||
9 | extern void no_iommu_init(void); | 9 | extern void no_iommu_init(void); |
10 | extern int force_iommu, no_iommu; | 10 | extern int force_iommu, no_iommu; |
11 | extern int iommu_detected; | 11 | extern int iommu_detected; |
12 | extern int iommu_pass_through; | ||
12 | extern void iommu_dma_init(void); | 13 | extern void iommu_dma_init(void); |
13 | extern void machvec_init(const char *name); | 14 | extern void machvec_init(const char *name); |
14 | 15 | ||
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c6..223abb134105 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c | |||
@@ -46,7 +46,7 @@ void __init swiotlb_dma_init(void) | |||
46 | 46 | ||
47 | void __init pci_swiotlb_init(void) | 47 | void __init pci_swiotlb_init(void) |
48 | { | 48 | { |
49 | if (!iommu_detected) { | 49 | if (!iommu_detected || iommu_pass_through) { |
50 | #ifdef CONFIG_IA64_GENERIC | 50 | #ifdef CONFIG_IA64_GENERIC |
51 | swiotlb = 1; | 51 | swiotlb = 1; |
52 | printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); | 52 | printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); |
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index af326a2975b5..fd6d21bbee6c 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -6,6 +6,7 @@ extern void no_iommu_init(void); | |||
6 | extern struct dma_map_ops nommu_dma_ops; | 6 | extern struct dma_map_ops nommu_dma_ops; |
7 | extern int force_iommu, no_iommu; | 7 | extern int force_iommu, no_iommu; |
8 | extern int iommu_detected; | 8 | extern int iommu_detected; |
9 | extern int iommu_pass_through; | ||
9 | 10 | ||
10 | /* 10 seconds */ | 11 | /* 10 seconds */ |
11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 12 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 745579bc8256..8cad0d854242 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -160,6 +160,8 @@ again: | |||
160 | return page_address(page); | 160 | return page_address(page); |
161 | } | 161 | } |
162 | 162 | ||
163 | extern int iommu_pass_through; | ||
164 | |||
163 | /* | 165 | /* |
164 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter | 166 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter |
165 | * documentation. | 167 | * documentation. |
@@ -209,6 +211,10 @@ static __init int iommu_setup(char *p) | |||
209 | #ifdef CONFIG_SWIOTLB | 211 | #ifdef CONFIG_SWIOTLB |
210 | if (!strncmp(p, "soft", 4)) | 212 | if (!strncmp(p, "soft", 4)) |
211 | swiotlb = 1; | 213 | swiotlb = 1; |
214 | if (!strncmp(p, "pt", 2)) { | ||
215 | iommu_pass_through = 1; | ||
216 | return 1; | ||
217 | } | ||
212 | #endif | 218 | #endif |
213 | 219 | ||
214 | gart_parse_options(p); | 220 | gart_parse_options(p); |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 221a3853e268..3a0c51e0ba6d 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void) | |||
71 | { | 71 | { |
72 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 72 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) | 74 | if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || |
75 | iommu_pass_through) | ||
75 | swiotlb = 1; | 76 | swiotlb = 1; |
76 | #endif | 77 | #endif |
77 | if (swiotlb_force) | 78 | if (swiotlb_force) |
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index fa3a11365ec3..d3d86b749eee 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c | |||
@@ -515,6 +515,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) | |||
515 | u32 ver; | 515 | u32 ver; |
516 | static int iommu_allocated = 0; | 516 | static int iommu_allocated = 0; |
517 | int agaw = 0; | 517 | int agaw = 0; |
518 | int msagaw = 0; | ||
518 | 519 | ||
519 | iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); | 520 | iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); |
520 | if (!iommu) | 521 | if (!iommu) |
@@ -535,12 +536,20 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) | |||
535 | agaw = iommu_calculate_agaw(iommu); | 536 | agaw = iommu_calculate_agaw(iommu); |
536 | if (agaw < 0) { | 537 | if (agaw < 0) { |
537 | printk(KERN_ERR | 538 | printk(KERN_ERR |
538 | "Cannot get a valid agaw for iommu (seq_id = %d)\n", | 539 | "Cannot get a valid agaw for iommu (seq_id = %d)\n", |
540 | iommu->seq_id); | ||
541 | goto error; | ||
542 | } | ||
543 | msagaw = iommu_calculate_max_sagaw(iommu); | ||
544 | if (msagaw < 0) { | ||
545 | printk(KERN_ERR | ||
546 | "Cannot get a valid max agaw for iommu (seq_id = %d)\n", | ||
539 | iommu->seq_id); | 547 | iommu->seq_id); |
540 | goto error; | 548 | goto error; |
541 | } | 549 | } |
542 | #endif | 550 | #endif |
543 | iommu->agaw = agaw; | 551 | iommu->agaw = agaw; |
552 | iommu->msagaw = msagaw; | ||
544 | 553 | ||
545 | /* the registers might be more than one page */ | 554 | /* the registers might be more than one page */ |
546 | map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), | 555 | map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), |
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 001b328adf80..13121821db7f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
@@ -53,6 +53,8 @@ | |||
53 | 53 | ||
54 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 | 54 | #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 |
55 | 55 | ||
56 | #define MAX_AGAW_WIDTH 64 | ||
57 | |||
56 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) | 58 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) |
57 | 59 | ||
58 | #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) | 60 | #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) |
@@ -127,8 +129,6 @@ static inline void context_set_fault_enable(struct context_entry *context) | |||
127 | context->lo &= (((u64)-1) << 2) | 1; | 129 | context->lo &= (((u64)-1) << 2) | 1; |
128 | } | 130 | } |
129 | 131 | ||
130 | #define CONTEXT_TT_MULTI_LEVEL 0 | ||
131 | |||
132 | static inline void context_set_translation_type(struct context_entry *context, | 132 | static inline void context_set_translation_type(struct context_entry *context, |
133 | unsigned long value) | 133 | unsigned long value) |
134 | { | 134 | { |
@@ -288,6 +288,7 @@ int dmar_disabled = 1; | |||
288 | static int __initdata dmar_map_gfx = 1; | 288 | static int __initdata dmar_map_gfx = 1; |
289 | static int dmar_forcedac; | 289 | static int dmar_forcedac; |
290 | static int intel_iommu_strict; | 290 | static int intel_iommu_strict; |
291 | int iommu_pass_through; | ||
291 | 292 | ||
292 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) | 293 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) |
293 | static DEFINE_SPINLOCK(device_domain_lock); | 294 | static DEFINE_SPINLOCK(device_domain_lock); |
@@ -397,17 +398,13 @@ void free_iova_mem(struct iova *iova) | |||
397 | 398 | ||
398 | static inline int width_to_agaw(int width); | 399 | static inline int width_to_agaw(int width); |
399 | 400 | ||
400 | /* calculate agaw for each iommu. | 401 | static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) |
401 | * "SAGAW" may be different across iommus, use a default agaw, and | ||
402 | * get a supported less agaw for iommus that don't support the default agaw. | ||
403 | */ | ||
404 | int iommu_calculate_agaw(struct intel_iommu *iommu) | ||
405 | { | 402 | { |
406 | unsigned long sagaw; | 403 | unsigned long sagaw; |
407 | int agaw = -1; | 404 | int agaw = -1; |
408 | 405 | ||
409 | sagaw = cap_sagaw(iommu->cap); | 406 | sagaw = cap_sagaw(iommu->cap); |
410 | for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); | 407 | for (agaw = width_to_agaw(max_gaw); |
411 | agaw >= 0; agaw--) { | 408 | agaw >= 0; agaw--) { |
412 | if (test_bit(agaw, &sagaw)) | 409 | if (test_bit(agaw, &sagaw)) |
413 | break; | 410 | break; |
@@ -416,6 +413,24 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) | |||
416 | return agaw; | 413 | return agaw; |
417 | } | 414 | } |
418 | 415 | ||
416 | /* | ||
417 | * Calculate max SAGAW for each iommu. | ||
418 | */ | ||
419 | int iommu_calculate_max_sagaw(struct intel_iommu *iommu) | ||
420 | { | ||
421 | return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); | ||
422 | } | ||
423 | |||
424 | /* | ||
425 | * calculate agaw for each iommu. | ||
426 | * "SAGAW" may be different across iommus, use a default agaw, and | ||
427 | * get a supported less agaw for iommus that don't support the default agaw. | ||
428 | */ | ||
429 | int iommu_calculate_agaw(struct intel_iommu *iommu) | ||
430 | { | ||
431 | return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
432 | } | ||
433 | |||
419 | /* in native case, each domain is related to only one iommu */ | 434 | /* in native case, each domain is related to only one iommu */ |
420 | static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) | 435 | static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) |
421 | { | 436 | { |
@@ -1321,8 +1336,8 @@ static void domain_exit(struct dmar_domain *domain) | |||
1321 | free_domain_mem(domain); | 1336 | free_domain_mem(domain); |
1322 | } | 1337 | } |
1323 | 1338 | ||
1324 | static int domain_context_mapping_one(struct dmar_domain *domain, | 1339 | static int domain_context_mapping_one(struct dmar_domain *domain, int segment, |
1325 | int segment, u8 bus, u8 devfn) | 1340 | u8 bus, u8 devfn, int translation) |
1326 | { | 1341 | { |
1327 | struct context_entry *context; | 1342 | struct context_entry *context; |
1328 | unsigned long flags; | 1343 | unsigned long flags; |
@@ -1335,7 +1350,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1335 | 1350 | ||
1336 | pr_debug("Set context mapping for %02x:%02x.%d\n", | 1351 | pr_debug("Set context mapping for %02x:%02x.%d\n", |
1337 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); | 1352 | bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); |
1353 | |||
1338 | BUG_ON(!domain->pgd); | 1354 | BUG_ON(!domain->pgd); |
1355 | BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && | ||
1356 | translation != CONTEXT_TT_MULTI_LEVEL); | ||
1339 | 1357 | ||
1340 | iommu = device_to_iommu(segment, bus, devfn); | 1358 | iommu = device_to_iommu(segment, bus, devfn); |
1341 | if (!iommu) | 1359 | if (!iommu) |
@@ -1395,9 +1413,18 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1395 | } | 1413 | } |
1396 | 1414 | ||
1397 | context_set_domain_id(context, id); | 1415 | context_set_domain_id(context, id); |
1398 | context_set_address_width(context, iommu->agaw); | 1416 | |
1399 | context_set_address_root(context, virt_to_phys(pgd)); | 1417 | /* |
1400 | context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); | 1418 | * In pass through mode, AW must be programmed to indicate the largest |
1419 | * AGAW value supported by hardware. And ASR is ignored by hardware. | ||
1420 | */ | ||
1421 | if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) { | ||
1422 | context_set_address_width(context, iommu->agaw); | ||
1423 | context_set_address_root(context, virt_to_phys(pgd)); | ||
1424 | } else | ||
1425 | context_set_address_width(context, iommu->msagaw); | ||
1426 | |||
1427 | context_set_translation_type(context, translation); | ||
1401 | context_set_fault_enable(context); | 1428 | context_set_fault_enable(context); |
1402 | context_set_present(context); | 1429 | context_set_present(context); |
1403 | domain_flush_cache(domain, context, sizeof(*context)); | 1430 | domain_flush_cache(domain, context, sizeof(*context)); |
@@ -1422,13 +1449,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, | |||
1422 | } | 1449 | } |
1423 | 1450 | ||
1424 | static int | 1451 | static int |
1425 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | 1452 | domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, |
1453 | int translation) | ||
1426 | { | 1454 | { |
1427 | int ret; | 1455 | int ret; |
1428 | struct pci_dev *tmp, *parent; | 1456 | struct pci_dev *tmp, *parent; |
1429 | 1457 | ||
1430 | ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), | 1458 | ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), |
1431 | pdev->bus->number, pdev->devfn); | 1459 | pdev->bus->number, pdev->devfn, |
1460 | translation); | ||
1432 | if (ret) | 1461 | if (ret) |
1433 | return ret; | 1462 | return ret; |
1434 | 1463 | ||
@@ -1442,7 +1471,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | |||
1442 | ret = domain_context_mapping_one(domain, | 1471 | ret = domain_context_mapping_one(domain, |
1443 | pci_domain_nr(parent->bus), | 1472 | pci_domain_nr(parent->bus), |
1444 | parent->bus->number, | 1473 | parent->bus->number, |
1445 | parent->devfn); | 1474 | parent->devfn, translation); |
1446 | if (ret) | 1475 | if (ret) |
1447 | return ret; | 1476 | return ret; |
1448 | parent = parent->bus->self; | 1477 | parent = parent->bus->self; |
@@ -1450,12 +1479,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) | |||
1450 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ | 1479 | if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ |
1451 | return domain_context_mapping_one(domain, | 1480 | return domain_context_mapping_one(domain, |
1452 | pci_domain_nr(tmp->subordinate), | 1481 | pci_domain_nr(tmp->subordinate), |
1453 | tmp->subordinate->number, 0); | 1482 | tmp->subordinate->number, 0, |
1483 | translation); | ||
1454 | else /* this is a legacy PCI bridge */ | 1484 | else /* this is a legacy PCI bridge */ |
1455 | return domain_context_mapping_one(domain, | 1485 | return domain_context_mapping_one(domain, |
1456 | pci_domain_nr(tmp->bus), | 1486 | pci_domain_nr(tmp->bus), |
1457 | tmp->bus->number, | 1487 | tmp->bus->number, |
1458 | tmp->devfn); | 1488 | tmp->devfn, |
1489 | translation); | ||
1459 | } | 1490 | } |
1460 | 1491 | ||
1461 | static int domain_context_mapped(struct pci_dev *pdev) | 1492 | static int domain_context_mapped(struct pci_dev *pdev) |
@@ -1752,7 +1783,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, | |||
1752 | goto error; | 1783 | goto error; |
1753 | 1784 | ||
1754 | /* context entry init */ | 1785 | /* context entry init */ |
1755 | ret = domain_context_mapping(domain, pdev); | 1786 | ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); |
1756 | if (!ret) | 1787 | if (!ret) |
1757 | return 0; | 1788 | return 0; |
1758 | error: | 1789 | error: |
@@ -1853,6 +1884,23 @@ static inline void iommu_prepare_isa(void) | |||
1853 | } | 1884 | } |
1854 | #endif /* !CONFIG_DMAR_FLPY_WA */ | 1885 | #endif /* !CONFIG_DMAR_FLPY_WA */ |
1855 | 1886 | ||
1887 | /* Initialize each context entry as pass through.*/ | ||
1888 | static int __init init_context_pass_through(void) | ||
1889 | { | ||
1890 | struct pci_dev *pdev = NULL; | ||
1891 | struct dmar_domain *domain; | ||
1892 | int ret; | ||
1893 | |||
1894 | for_each_pci_dev(pdev) { | ||
1895 | domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); | ||
1896 | ret = domain_context_mapping(domain, pdev, | ||
1897 | CONTEXT_TT_PASS_THROUGH); | ||
1898 | if (ret) | ||
1899 | return ret; | ||
1900 | } | ||
1901 | return 0; | ||
1902 | } | ||
1903 | |||
1856 | static int __init init_dmars(void) | 1904 | static int __init init_dmars(void) |
1857 | { | 1905 | { |
1858 | struct dmar_drhd_unit *drhd; | 1906 | struct dmar_drhd_unit *drhd; |
@@ -1860,6 +1908,7 @@ static int __init init_dmars(void) | |||
1860 | struct pci_dev *pdev; | 1908 | struct pci_dev *pdev; |
1861 | struct intel_iommu *iommu; | 1909 | struct intel_iommu *iommu; |
1862 | int i, ret; | 1910 | int i, ret; |
1911 | int pass_through = 1; | ||
1863 | 1912 | ||
1864 | /* | 1913 | /* |
1865 | * for each drhd | 1914 | * for each drhd |
@@ -1913,7 +1962,15 @@ static int __init init_dmars(void) | |||
1913 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); | 1962 | printk(KERN_ERR "IOMMU: allocate root entry failed\n"); |
1914 | goto error; | 1963 | goto error; |
1915 | } | 1964 | } |
1965 | if (!ecap_pass_through(iommu->ecap)) | ||
1966 | pass_through = 0; | ||
1916 | } | 1967 | } |
1968 | if (iommu_pass_through) | ||
1969 | if (!pass_through) { | ||
1970 | printk(KERN_INFO | ||
1971 | "Pass Through is not supported by hardware.\n"); | ||
1972 | iommu_pass_through = 0; | ||
1973 | } | ||
1917 | 1974 | ||
1918 | /* | 1975 | /* |
1919 | * Start from the sane iommu hardware state. | 1976 | * Start from the sane iommu hardware state. |
@@ -1976,37 +2033,57 @@ static int __init init_dmars(void) | |||
1976 | "IOMMU: enable interrupt remapping failed\n"); | 2033 | "IOMMU: enable interrupt remapping failed\n"); |
1977 | } | 2034 | } |
1978 | #endif | 2035 | #endif |
2036 | /* | ||
2037 | * If pass through is set and enabled, context entries of all pci | ||
2038 | * devices are intialized by pass through translation type. | ||
2039 | */ | ||
2040 | if (iommu_pass_through) { | ||
2041 | ret = init_context_pass_through(); | ||
2042 | if (ret) { | ||
2043 | printk(KERN_ERR "IOMMU: Pass through init failed.\n"); | ||
2044 | iommu_pass_through = 0; | ||
2045 | } | ||
2046 | } | ||
1979 | 2047 | ||
1980 | /* | 2048 | /* |
1981 | * For each rmrr | 2049 | * If pass through is not set or not enabled, setup context entries for |
1982 | * for each dev attached to rmrr | 2050 | * identity mappings for rmrr, gfx, and isa. |
1983 | * do | ||
1984 | * locate drhd for dev, alloc domain for dev | ||
1985 | * allocate free domain | ||
1986 | * allocate page table entries for rmrr | ||
1987 | * if context not allocated for bus | ||
1988 | * allocate and init context | ||
1989 | * set present in root table for this bus | ||
1990 | * init context with domain, translation etc | ||
1991 | * endfor | ||
1992 | * endfor | ||
1993 | */ | 2051 | */ |
1994 | for_each_rmrr_units(rmrr) { | 2052 | if (!iommu_pass_through) { |
1995 | for (i = 0; i < rmrr->devices_cnt; i++) { | 2053 | /* |
1996 | pdev = rmrr->devices[i]; | 2054 | * For each rmrr |
1997 | /* some BIOS lists non-exist devices in DMAR table */ | 2055 | * for each dev attached to rmrr |
1998 | if (!pdev) | 2056 | * do |
1999 | continue; | 2057 | * locate drhd for dev, alloc domain for dev |
2000 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | 2058 | * allocate free domain |
2001 | if (ret) | 2059 | * allocate page table entries for rmrr |
2002 | printk(KERN_ERR | 2060 | * if context not allocated for bus |
2061 | * allocate and init context | ||
2062 | * set present in root table for this bus | ||
2063 | * init context with domain, translation etc | ||
2064 | * endfor | ||
2065 | * endfor | ||
2066 | */ | ||
2067 | for_each_rmrr_units(rmrr) { | ||
2068 | for (i = 0; i < rmrr->devices_cnt; i++) { | ||
2069 | pdev = rmrr->devices[i]; | ||
2070 | /* | ||
2071 | * some BIOS lists non-exist devices in DMAR | ||
2072 | * table. | ||
2073 | */ | ||
2074 | if (!pdev) | ||
2075 | continue; | ||
2076 | ret = iommu_prepare_rmrr_dev(rmrr, pdev); | ||
2077 | if (ret) | ||
2078 | printk(KERN_ERR | ||
2003 | "IOMMU: mapping reserved region failed\n"); | 2079 | "IOMMU: mapping reserved region failed\n"); |
2080 | } | ||
2004 | } | 2081 | } |
2005 | } | ||
2006 | 2082 | ||
2007 | iommu_prepare_gfx_mapping(); | 2083 | iommu_prepare_gfx_mapping(); |
2008 | 2084 | ||
2009 | iommu_prepare_isa(); | 2085 | iommu_prepare_isa(); |
2086 | } | ||
2010 | 2087 | ||
2011 | /* | 2088 | /* |
2012 | * for each drhd | 2089 | * for each drhd |
@@ -2117,7 +2194,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev) | |||
2117 | 2194 | ||
2118 | /* make sure context mapping is ok */ | 2195 | /* make sure context mapping is ok */ |
2119 | if (unlikely(!domain_context_mapped(pdev))) { | 2196 | if (unlikely(!domain_context_mapped(pdev))) { |
2120 | ret = domain_context_mapping(domain, pdev); | 2197 | ret = domain_context_mapping(domain, pdev, |
2198 | CONTEXT_TT_MULTI_LEVEL); | ||
2121 | if (ret) { | 2199 | if (ret) { |
2122 | printk(KERN_ERR | 2200 | printk(KERN_ERR |
2123 | "Domain context map for %s failed", | 2201 | "Domain context map for %s failed", |
@@ -2786,7 +2864,7 @@ int __init intel_iommu_init(void) | |||
2786 | * Check the need for DMA-remapping initialization now. | 2864 | * Check the need for DMA-remapping initialization now. |
2787 | * Above initialization will also be used by Interrupt-remapping. | 2865 | * Above initialization will also be used by Interrupt-remapping. |
2788 | */ | 2866 | */ |
2789 | if (no_iommu || swiotlb || dmar_disabled) | 2867 | if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) |
2790 | return -ENODEV; | 2868 | return -ENODEV; |
2791 | 2869 | ||
2792 | iommu_init_mempool(); | 2870 | iommu_init_mempool(); |
@@ -2806,7 +2884,15 @@ int __init intel_iommu_init(void) | |||
2806 | 2884 | ||
2807 | init_timer(&unmap_timer); | 2885 | init_timer(&unmap_timer); |
2808 | force_iommu = 1; | 2886 | force_iommu = 1; |
2809 | dma_ops = &intel_dma_ops; | 2887 | |
2888 | if (!iommu_pass_through) { | ||
2889 | printk(KERN_INFO | ||
2890 | "Multi-level page-table translation for DMAR.\n"); | ||
2891 | dma_ops = &intel_dma_ops; | ||
2892 | } else | ||
2893 | printk(KERN_INFO | ||
2894 | "DMAR: Pass through translation for DMAR.\n"); | ||
2895 | |||
2810 | init_iommu_sysfs(); | 2896 | init_iommu_sysfs(); |
2811 | 2897 | ||
2812 | register_iommu(&intel_iommu_ops); | 2898 | register_iommu(&intel_iommu_ops); |
@@ -3146,7 +3232,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, | |||
3146 | return -EFAULT; | 3232 | return -EFAULT; |
3147 | } | 3233 | } |
3148 | 3234 | ||
3149 | ret = domain_context_mapping(dmar_domain, pdev); | 3235 | ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); |
3150 | if (ret) | 3236 | if (ret) |
3151 | return ret; | 3237 | return ret; |
3152 | 3238 | ||
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 1a455f1f86d7..e0a03aff63d9 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h | |||
@@ -13,6 +13,9 @@ | |||
13 | #define DMA_PTE_WRITE (2) | 13 | #define DMA_PTE_WRITE (2) |
14 | #define DMA_PTE_SNP (1 << 11) | 14 | #define DMA_PTE_SNP (1 << 11) |
15 | 15 | ||
16 | #define CONTEXT_TT_MULTI_LEVEL 0 | ||
17 | #define CONTEXT_TT_PASS_THROUGH 2 | ||
18 | |||
16 | struct intel_iommu; | 19 | struct intel_iommu; |
17 | struct dmar_domain; | 20 | struct dmar_domain; |
18 | struct root_entry; | 21 | struct root_entry; |
@@ -21,11 +24,16 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); | |||
21 | 24 | ||
22 | #ifdef CONFIG_DMAR | 25 | #ifdef CONFIG_DMAR |
23 | extern int iommu_calculate_agaw(struct intel_iommu *iommu); | 26 | extern int iommu_calculate_agaw(struct intel_iommu *iommu); |
27 | extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); | ||
24 | #else | 28 | #else |
25 | static inline int iommu_calculate_agaw(struct intel_iommu *iommu) | 29 | static inline int iommu_calculate_agaw(struct intel_iommu *iommu) |
26 | { | 30 | { |
27 | return 0; | 31 | return 0; |
28 | } | 32 | } |
33 | static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) | ||
34 | { | ||
35 | return 0; | ||
36 | } | ||
29 | #endif | 37 | #endif |
30 | 38 | ||
31 | extern int dmar_disabled; | 39 | extern int dmar_disabled; |
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index aa8c53171233..7246971a7feb 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h | |||
@@ -120,6 +120,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) | |||
120 | (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) | 120 | (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) |
121 | #define ecap_coherent(e) ((e) & 0x1) | 121 | #define ecap_coherent(e) ((e) & 0x1) |
122 | #define ecap_qis(e) ((e) & 0x2) | 122 | #define ecap_qis(e) ((e) & 0x2) |
123 | #define ecap_pass_through(e) ((e >> 6) & 0x1) | ||
123 | #define ecap_eim_support(e) ((e >> 4) & 0x1) | 124 | #define ecap_eim_support(e) ((e >> 4) & 0x1) |
124 | #define ecap_ir_support(e) ((e >> 3) & 0x1) | 125 | #define ecap_ir_support(e) ((e >> 3) & 0x1) |
125 | #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) | 126 | #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) |
@@ -302,6 +303,7 @@ struct intel_iommu { | |||
302 | spinlock_t register_lock; /* protect register handling */ | 303 | spinlock_t register_lock; /* protect register handling */ |
303 | int seq_id; /* sequence id of the iommu */ | 304 | int seq_id; /* sequence id of the iommu */ |
304 | int agaw; /* agaw of this iommu */ | 305 | int agaw; /* agaw of this iommu */ |
306 | int msagaw; /* max sagaw of this iommu */ | ||
305 | unsigned int irq; | 307 | unsigned int irq; |
306 | unsigned char name[13]; /* Device Name */ | 308 | unsigned char name[13]; /* Device Name */ |
307 | 309 | ||