aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-23 00:38:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-23 00:38:22 -0400
commit687d680985b1438360a9ba470ece8b57cd205c3b (patch)
treeae253608531e5c3e823600974c610e722e7de759 /drivers/pci/intel-iommu.c
parent1053414068bad659479e6efa62a67403b8b1ec0a (diff)
parent008fe148cb0fb51d266baabe2c09997b21cf90c6 (diff)
Merge git://git.infradead.org/~dwmw2/iommu-2.6.31
* git://git.infradead.org/~dwmw2/iommu-2.6.31: intel-iommu: Fix one last ia64 build problem in Pass Through Support VT-d: support the device IOTLB VT-d: cleanup iommu_flush_iotlb_psi and flush_unmaps VT-d: add device IOTLB invalidation support VT-d: parse ATSR in DMA Remapping Reporting Structure PCI: handle Virtual Function ATS enabling PCI: support the ATS capability intel-iommu: dmar_set_interrupt return error value intel-iommu: Tidy up iommu->gcmd handling intel-iommu: Fix tiny theoretical race in write-buffer flush. intel-iommu: Clean up handling of "caching mode" vs. IOTLB flushing. intel-iommu: Clean up handling of "caching mode" vs. context flushing. VT-d: fix invalid domain id for KVM context flush Fix !CONFIG_DMAR build failure introduced by Intel IOMMU Pass Through Support Intel IOMMU Pass Through Support Fix up trivial conflicts in drivers/pci/{intel-iommu.c,intr_remapping.c}
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c449
1 files changed, 296 insertions, 153 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index cd389162735..178853a0744 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -53,6 +53,8 @@
53 53
54#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 54#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
55 55
56#define MAX_AGAW_WIDTH 64
57
56#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
57 59
58#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 60#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
@@ -131,8 +133,6 @@ static inline void context_set_fault_enable(struct context_entry *context)
131 context->lo &= (((u64)-1) << 2) | 1; 133 context->lo &= (((u64)-1) << 2) | 1;
132} 134}
133 135
134#define CONTEXT_TT_MULTI_LEVEL 0
135
136static inline void context_set_translation_type(struct context_entry *context, 136static inline void context_set_translation_type(struct context_entry *context,
137 unsigned long value) 137 unsigned long value)
138{ 138{
@@ -256,6 +256,7 @@ struct device_domain_info {
256 u8 bus; /* PCI bus number */ 256 u8 bus; /* PCI bus number */
257 u8 devfn; /* PCI devfn number */ 257 u8 devfn; /* PCI devfn number */
258 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ 258 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
259 struct intel_iommu *iommu; /* IOMMU used by this device */
259 struct dmar_domain *domain; /* pointer to domain */ 260 struct dmar_domain *domain; /* pointer to domain */
260}; 261};
261 262
@@ -401,17 +402,13 @@ void free_iova_mem(struct iova *iova)
401 402
402static inline int width_to_agaw(int width); 403static inline int width_to_agaw(int width);
403 404
404/* calculate agaw for each iommu. 405static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
405 * "SAGAW" may be different across iommus, use a default agaw, and
406 * get a supported less agaw for iommus that don't support the default agaw.
407 */
408int iommu_calculate_agaw(struct intel_iommu *iommu)
409{ 406{
410 unsigned long sagaw; 407 unsigned long sagaw;
411 int agaw = -1; 408 int agaw = -1;
412 409
413 sagaw = cap_sagaw(iommu->cap); 410 sagaw = cap_sagaw(iommu->cap);
414 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); 411 for (agaw = width_to_agaw(max_gaw);
415 agaw >= 0; agaw--) { 412 agaw >= 0; agaw--) {
416 if (test_bit(agaw, &sagaw)) 413 if (test_bit(agaw, &sagaw))
417 break; 414 break;
@@ -420,6 +417,24 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
420 return agaw; 417 return agaw;
421} 418}
422 419
420/*
421 * Calculate max SAGAW for each iommu.
422 */
423int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
424{
425 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
426}
427
428/*
429 * calculate agaw for each iommu.
430 * "SAGAW" may be different across iommus, use a default agaw, and
431 * get a supported less agaw for iommus that don't support the default agaw.
432 */
433int iommu_calculate_agaw(struct intel_iommu *iommu)
434{
435 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
436}
437
423/* in native case, each domain is related to only one iommu */ 438/* in native case, each domain is related to only one iommu */
424static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 439static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
425{ 440{
@@ -809,7 +824,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
809static void iommu_set_root_entry(struct intel_iommu *iommu) 824static void iommu_set_root_entry(struct intel_iommu *iommu)
810{ 825{
811 void *addr; 826 void *addr;
812 u32 cmd, sts; 827 u32 sts;
813 unsigned long flag; 828 unsigned long flag;
814 829
815 addr = iommu->root_entry; 830 addr = iommu->root_entry;
@@ -817,12 +832,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
817 spin_lock_irqsave(&iommu->register_lock, flag); 832 spin_lock_irqsave(&iommu->register_lock, flag);
818 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); 833 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
819 834
820 cmd = iommu->gcmd | DMA_GCMD_SRTP; 835 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
821 writel(cmd, iommu->reg + DMAR_GCMD_REG);
822 836
823 /* Make sure hardware complete it */ 837 /* Make sure hardware complete it */
824 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 838 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
825 readl, (sts & DMA_GSTS_RTPS), sts); 839 readl, (sts & DMA_GSTS_RTPS), sts);
826 840
827 spin_unlock_irqrestore(&iommu->register_lock, flag); 841 spin_unlock_irqrestore(&iommu->register_lock, flag);
828} 842}
@@ -834,39 +848,25 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu)
834 848
835 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 849 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
836 return; 850 return;
837 val = iommu->gcmd | DMA_GCMD_WBF;
838 851
839 spin_lock_irqsave(&iommu->register_lock, flag); 852 spin_lock_irqsave(&iommu->register_lock, flag);
840 writel(val, iommu->reg + DMAR_GCMD_REG); 853 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
841 854
842 /* Make sure hardware complete it */ 855 /* Make sure hardware complete it */
843 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 856 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
844 readl, (!(val & DMA_GSTS_WBFS)), val); 857 readl, (!(val & DMA_GSTS_WBFS)), val);
845 858
846 spin_unlock_irqrestore(&iommu->register_lock, flag); 859 spin_unlock_irqrestore(&iommu->register_lock, flag);
847} 860}
848 861
849/* return value determine if we need a write buffer flush */ 862/* return value determine if we need a write buffer flush */
850static int __iommu_flush_context(struct intel_iommu *iommu, 863static void __iommu_flush_context(struct intel_iommu *iommu,
851 u16 did, u16 source_id, u8 function_mask, u64 type, 864 u16 did, u16 source_id, u8 function_mask,
852 int non_present_entry_flush) 865 u64 type)
853{ 866{
854 u64 val = 0; 867 u64 val = 0;
855 unsigned long flag; 868 unsigned long flag;
856 869
857 /*
858 * In the non-present entry flush case, if hardware doesn't cache
859 * non-present entry we do nothing and if hardware cache non-present
860 * entry, we flush entries of domain 0 (the domain id is used to cache
861 * any non-present entries)
862 */
863 if (non_present_entry_flush) {
864 if (!cap_caching_mode(iommu->cap))
865 return 1;
866 else
867 did = 0;
868 }
869
870 switch (type) { 870 switch (type) {
871 case DMA_CCMD_GLOBAL_INVL: 871 case DMA_CCMD_GLOBAL_INVL:
872 val = DMA_CCMD_GLOBAL_INVL; 872 val = DMA_CCMD_GLOBAL_INVL;
@@ -891,33 +891,16 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
891 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 891 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
892 892
893 spin_unlock_irqrestore(&iommu->register_lock, flag); 893 spin_unlock_irqrestore(&iommu->register_lock, flag);
894
895 /* flush context entry will implicitly flush write buffer */
896 return 0;
897} 894}
898 895
899/* return value determine if we need a write buffer flush */ 896/* return value determine if we need a write buffer flush */
900static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 897static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
901 u64 addr, unsigned int size_order, u64 type, 898 u64 addr, unsigned int size_order, u64 type)
902 int non_present_entry_flush)
903{ 899{
904 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 900 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
905 u64 val = 0, val_iva = 0; 901 u64 val = 0, val_iva = 0;
906 unsigned long flag; 902 unsigned long flag;
907 903
908 /*
909 * In the non-present entry flush case, if hardware doesn't cache
910 * non-present entry we do nothing and if hardware cache non-present
911 * entry, we flush entries of domain 0 (the domain id is used to cache
912 * any non-present entries)
913 */
914 if (non_present_entry_flush) {
915 if (!cap_caching_mode(iommu->cap))
916 return 1;
917 else
918 did = 0;
919 }
920
921 switch (type) { 904 switch (type) {
922 case DMA_TLB_GLOBAL_FLUSH: 905 case DMA_TLB_GLOBAL_FLUSH:
923 /* global flush doesn't need set IVA_REG */ 906 /* global flush doesn't need set IVA_REG */
@@ -965,37 +948,101 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
965 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 948 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
966 (unsigned long long)DMA_TLB_IIRG(type), 949 (unsigned long long)DMA_TLB_IIRG(type),
967 (unsigned long long)DMA_TLB_IAIG(val)); 950 (unsigned long long)DMA_TLB_IAIG(val));
968 /* flush iotlb entry will implicitly flush write buffer */
969 return 0;
970} 951}
971 952
972static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 953static struct device_domain_info *iommu_support_dev_iotlb(
973 u64 addr, unsigned int pages, int non_present_entry_flush) 954 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
955{
956 int found = 0;
957 unsigned long flags;
958 struct device_domain_info *info;
959 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
960
961 if (!ecap_dev_iotlb_support(iommu->ecap))
962 return NULL;
963
964 if (!iommu->qi)
965 return NULL;
966
967 spin_lock_irqsave(&device_domain_lock, flags);
968 list_for_each_entry(info, &domain->devices, link)
969 if (info->bus == bus && info->devfn == devfn) {
970 found = 1;
971 break;
972 }
973 spin_unlock_irqrestore(&device_domain_lock, flags);
974
975 if (!found || !info->dev)
976 return NULL;
977
978 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
979 return NULL;
980
981 if (!dmar_find_matched_atsr_unit(info->dev))
982 return NULL;
983
984 info->iommu = iommu;
985
986 return info;
987}
988
989static void iommu_enable_dev_iotlb(struct device_domain_info *info)
974{ 990{
975 unsigned int mask; 991 if (!info)
992 return;
993
994 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
995}
996
997static void iommu_disable_dev_iotlb(struct device_domain_info *info)
998{
999 if (!info->dev || !pci_ats_enabled(info->dev))
1000 return;
1001
1002 pci_disable_ats(info->dev);
1003}
1004
1005static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1006 u64 addr, unsigned mask)
1007{
1008 u16 sid, qdep;
1009 unsigned long flags;
1010 struct device_domain_info *info;
1011
1012 spin_lock_irqsave(&device_domain_lock, flags);
1013 list_for_each_entry(info, &domain->devices, link) {
1014 if (!info->dev || !pci_ats_enabled(info->dev))
1015 continue;
1016
1017 sid = info->bus << 8 | info->devfn;
1018 qdep = pci_ats_queue_depth(info->dev);
1019 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1020 }
1021 spin_unlock_irqrestore(&device_domain_lock, flags);
1022}
1023
1024static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1025 u64 addr, unsigned int pages)
1026{
1027 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
976 1028
977 BUG_ON(addr & (~VTD_PAGE_MASK)); 1029 BUG_ON(addr & (~VTD_PAGE_MASK));
978 BUG_ON(pages == 0); 1030 BUG_ON(pages == 0);
979 1031
980 /* Fallback to domain selective flush if no PSI support */
981 if (!cap_pgsel_inv(iommu->cap))
982 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
983 DMA_TLB_DSI_FLUSH,
984 non_present_entry_flush);
985
986 /* 1032 /*
1033 * Fallback to domain selective flush if no PSI support or the size is
1034 * too big.
987 * PSI requires page size to be 2 ^ x, and the base address is naturally 1035 * PSI requires page size to be 2 ^ x, and the base address is naturally
988 * aligned to the size 1036 * aligned to the size
989 */ 1037 */
990 mask = ilog2(__roundup_pow_of_two(pages)); 1038 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
991 /* Fallback to domain selective flush if size is too big */ 1039 iommu->flush.flush_iotlb(iommu, did, 0, 0,
992 if (mask > cap_max_amask_val(iommu->cap)) 1040 DMA_TLB_DSI_FLUSH);
993 return iommu->flush.flush_iotlb(iommu, did, 0, 0, 1041 else
994 DMA_TLB_DSI_FLUSH, non_present_entry_flush); 1042 iommu->flush.flush_iotlb(iommu, did, addr, mask,
995 1043 DMA_TLB_PSI_FLUSH);
996 return iommu->flush.flush_iotlb(iommu, did, addr, mask, 1044 if (did)
997 DMA_TLB_PSI_FLUSH, 1045 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
998 non_present_entry_flush);
999} 1046}
1000 1047
1001static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1048static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1021,13 +1068,13 @@ static int iommu_enable_translation(struct intel_iommu *iommu)
1021 unsigned long flags; 1068 unsigned long flags;
1022 1069
1023 spin_lock_irqsave(&iommu->register_lock, flags); 1070 spin_lock_irqsave(&iommu->register_lock, flags);
1024 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); 1071 iommu->gcmd |= DMA_GCMD_TE;
1072 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1025 1073
1026 /* Make sure hardware complete it */ 1074 /* Make sure hardware complete it */
1027 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1075 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1028 readl, (sts & DMA_GSTS_TES), sts); 1076 readl, (sts & DMA_GSTS_TES), sts);
1029 1077
1030 iommu->gcmd |= DMA_GCMD_TE;
1031 spin_unlock_irqrestore(&iommu->register_lock, flags); 1078 spin_unlock_irqrestore(&iommu->register_lock, flags);
1032 return 0; 1079 return 0;
1033} 1080}
@@ -1043,7 +1090,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
1043 1090
1044 /* Make sure hardware complete it */ 1091 /* Make sure hardware complete it */
1045 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1092 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1046 readl, (!(sts & DMA_GSTS_TES)), sts); 1093 readl, (!(sts & DMA_GSTS_TES)), sts);
1047 1094
1048 spin_unlock_irqrestore(&iommu->register_lock, flag); 1095 spin_unlock_irqrestore(&iommu->register_lock, flag);
1049 return 0; 1096 return 0;
@@ -1325,8 +1372,8 @@ static void domain_exit(struct dmar_domain *domain)
1325 free_domain_mem(domain); 1372 free_domain_mem(domain);
1326} 1373}
1327 1374
1328static int domain_context_mapping_one(struct dmar_domain *domain, 1375static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1329 int segment, u8 bus, u8 devfn) 1376 u8 bus, u8 devfn, int translation)
1330{ 1377{
1331 struct context_entry *context; 1378 struct context_entry *context;
1332 unsigned long flags; 1379 unsigned long flags;
@@ -1336,10 +1383,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1336 unsigned long ndomains; 1383 unsigned long ndomains;
1337 int id; 1384 int id;
1338 int agaw; 1385 int agaw;
1386 struct device_domain_info *info = NULL;
1339 1387
1340 pr_debug("Set context mapping for %02x:%02x.%d\n", 1388 pr_debug("Set context mapping for %02x:%02x.%d\n",
1341 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1389 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1390
1342 BUG_ON(!domain->pgd); 1391 BUG_ON(!domain->pgd);
1392 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1393 translation != CONTEXT_TT_MULTI_LEVEL);
1343 1394
1344 iommu = device_to_iommu(segment, bus, devfn); 1395 iommu = device_to_iommu(segment, bus, devfn);
1345 if (!iommu) 1396 if (!iommu)
@@ -1399,21 +1450,44 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1399 } 1450 }
1400 1451
1401 context_set_domain_id(context, id); 1452 context_set_domain_id(context, id);
1402 context_set_address_width(context, iommu->agaw); 1453
1403 context_set_address_root(context, virt_to_phys(pgd)); 1454 if (translation != CONTEXT_TT_PASS_THROUGH) {
1404 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); 1455 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1456 translation = info ? CONTEXT_TT_DEV_IOTLB :
1457 CONTEXT_TT_MULTI_LEVEL;
1458 }
1459 /*
1460 * In pass through mode, AW must be programmed to indicate the largest
1461 * AGAW value supported by hardware. And ASR is ignored by hardware.
1462 */
1463 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1464 context_set_address_width(context, iommu->msagaw);
1465 else {
1466 context_set_address_root(context, virt_to_phys(pgd));
1467 context_set_address_width(context, iommu->agaw);
1468 }
1469
1470 context_set_translation_type(context, translation);
1405 context_set_fault_enable(context); 1471 context_set_fault_enable(context);
1406 context_set_present(context); 1472 context_set_present(context);
1407 domain_flush_cache(domain, context, sizeof(*context)); 1473 domain_flush_cache(domain, context, sizeof(*context));
1408 1474
1409 /* it's a non-present to present mapping */ 1475 /*
1410 if (iommu->flush.flush_context(iommu, domain->id, 1476 * It's a non-present to present mapping. If hardware doesn't cache
1411 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1477 * non-present entry we only need to flush the write-buffer. If the
1412 DMA_CCMD_DEVICE_INVL, 1)) 1478 * _does_ cache non-present entries, then it does so in the special
1479 * domain #0, which we have to flush:
1480 */
1481 if (cap_caching_mode(iommu->cap)) {
1482 iommu->flush.flush_context(iommu, 0,
1483 (((u16)bus) << 8) | devfn,
1484 DMA_CCMD_MASK_NOBIT,
1485 DMA_CCMD_DEVICE_INVL);
1486 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
1487 } else {
1413 iommu_flush_write_buffer(iommu); 1488 iommu_flush_write_buffer(iommu);
1414 else 1489 }
1415 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); 1490 iommu_enable_dev_iotlb(info);
1416
1417 spin_unlock_irqrestore(&iommu->lock, flags); 1491 spin_unlock_irqrestore(&iommu->lock, flags);
1418 1492
1419 spin_lock_irqsave(&domain->iommu_lock, flags); 1493 spin_lock_irqsave(&domain->iommu_lock, flags);
@@ -1426,13 +1500,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1426} 1500}
1427 1501
1428static int 1502static int
1429domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) 1503domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1504 int translation)
1430{ 1505{
1431 int ret; 1506 int ret;
1432 struct pci_dev *tmp, *parent; 1507 struct pci_dev *tmp, *parent;
1433 1508
1434 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), 1509 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1435 pdev->bus->number, pdev->devfn); 1510 pdev->bus->number, pdev->devfn,
1511 translation);
1436 if (ret) 1512 if (ret)
1437 return ret; 1513 return ret;
1438 1514
@@ -1446,7 +1522,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1446 ret = domain_context_mapping_one(domain, 1522 ret = domain_context_mapping_one(domain,
1447 pci_domain_nr(parent->bus), 1523 pci_domain_nr(parent->bus),
1448 parent->bus->number, 1524 parent->bus->number,
1449 parent->devfn); 1525 parent->devfn, translation);
1450 if (ret) 1526 if (ret)
1451 return ret; 1527 return ret;
1452 parent = parent->bus->self; 1528 parent = parent->bus->self;
@@ -1454,12 +1530,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1454 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ 1530 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1455 return domain_context_mapping_one(domain, 1531 return domain_context_mapping_one(domain,
1456 pci_domain_nr(tmp->subordinate), 1532 pci_domain_nr(tmp->subordinate),
1457 tmp->subordinate->number, 0); 1533 tmp->subordinate->number, 0,
1534 translation);
1458 else /* this is a legacy PCI bridge */ 1535 else /* this is a legacy PCI bridge */
1459 return domain_context_mapping_one(domain, 1536 return domain_context_mapping_one(domain,
1460 pci_domain_nr(tmp->bus), 1537 pci_domain_nr(tmp->bus),
1461 tmp->bus->number, 1538 tmp->bus->number,
1462 tmp->devfn); 1539 tmp->devfn,
1540 translation);
1463} 1541}
1464 1542
1465static int domain_context_mapped(struct pci_dev *pdev) 1543static int domain_context_mapped(struct pci_dev *pdev)
@@ -1540,9 +1618,8 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1540 1618
1541 clear_context_table(iommu, bus, devfn); 1619 clear_context_table(iommu, bus, devfn);
1542 iommu->flush.flush_context(iommu, 0, 0, 0, 1620 iommu->flush.flush_context(iommu, 0, 0, 0,
1543 DMA_CCMD_GLOBAL_INVL, 0); 1621 DMA_CCMD_GLOBAL_INVL);
1544 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1622 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1545 DMA_TLB_GLOBAL_FLUSH, 0);
1546} 1623}
1547 1624
1548static void domain_remove_dev_info(struct dmar_domain *domain) 1625static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -1561,6 +1638,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
1561 info->dev->dev.archdata.iommu = NULL; 1638 info->dev->dev.archdata.iommu = NULL;
1562 spin_unlock_irqrestore(&device_domain_lock, flags); 1639 spin_unlock_irqrestore(&device_domain_lock, flags);
1563 1640
1641 iommu_disable_dev_iotlb(info);
1564 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 1642 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1565 iommu_detach_dev(iommu, info->bus, info->devfn); 1643 iommu_detach_dev(iommu, info->bus, info->devfn);
1566 free_devinfo_mem(info); 1644 free_devinfo_mem(info);
@@ -1756,7 +1834,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
1756 goto error; 1834 goto error;
1757 1835
1758 /* context entry init */ 1836 /* context entry init */
1759 ret = domain_context_mapping(domain, pdev); 1837 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1760 if (!ret) 1838 if (!ret)
1761 return 0; 1839 return 0;
1762error: 1840error:
@@ -1857,6 +1935,23 @@ static inline void iommu_prepare_isa(void)
1857} 1935}
1858#endif /* !CONFIG_DMAR_FLPY_WA */ 1936#endif /* !CONFIG_DMAR_FLPY_WA */
1859 1937
1938/* Initialize each context entry as pass through.*/
1939static int __init init_context_pass_through(void)
1940{
1941 struct pci_dev *pdev = NULL;
1942 struct dmar_domain *domain;
1943 int ret;
1944
1945 for_each_pci_dev(pdev) {
1946 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1947 ret = domain_context_mapping(domain, pdev,
1948 CONTEXT_TT_PASS_THROUGH);
1949 if (ret)
1950 return ret;
1951 }
1952 return 0;
1953}
1954
1860static int __init init_dmars(void) 1955static int __init init_dmars(void)
1861{ 1956{
1862 struct dmar_drhd_unit *drhd; 1957 struct dmar_drhd_unit *drhd;
@@ -1864,6 +1959,7 @@ static int __init init_dmars(void)
1864 struct pci_dev *pdev; 1959 struct pci_dev *pdev;
1865 struct intel_iommu *iommu; 1960 struct intel_iommu *iommu;
1866 int i, ret; 1961 int i, ret;
1962 int pass_through = 1;
1867 1963
1868 /* 1964 /*
1869 * for each drhd 1965 * for each drhd
@@ -1917,7 +2013,15 @@ static int __init init_dmars(void)
1917 printk(KERN_ERR "IOMMU: allocate root entry failed\n"); 2013 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1918 goto error; 2014 goto error;
1919 } 2015 }
2016 if (!ecap_pass_through(iommu->ecap))
2017 pass_through = 0;
1920 } 2018 }
2019 if (iommu_pass_through)
2020 if (!pass_through) {
2021 printk(KERN_INFO
2022 "Pass Through is not supported by hardware.\n");
2023 iommu_pass_through = 0;
2024 }
1921 2025
1922 /* 2026 /*
1923 * Start from the sane iommu hardware state. 2027 * Start from the sane iommu hardware state.
@@ -1973,35 +2077,56 @@ static int __init init_dmars(void)
1973 } 2077 }
1974 2078
1975 /* 2079 /*
1976 * For each rmrr 2080 * If pass through is set and enabled, context entries of all pci
1977 * for each dev attached to rmrr 2081 * devices are intialized by pass through translation type.
1978 * do
1979 * locate drhd for dev, alloc domain for dev
1980 * allocate free domain
1981 * allocate page table entries for rmrr
1982 * if context not allocated for bus
1983 * allocate and init context
1984 * set present in root table for this bus
1985 * init context with domain, translation etc
1986 * endfor
1987 * endfor
1988 */ 2082 */
1989 for_each_rmrr_units(rmrr) { 2083 if (iommu_pass_through) {
1990 for (i = 0; i < rmrr->devices_cnt; i++) { 2084 ret = init_context_pass_through();
1991 pdev = rmrr->devices[i]; 2085 if (ret) {
1992 /* some BIOS lists non-exist devices in DMAR table */ 2086 printk(KERN_ERR "IOMMU: Pass through init failed.\n");
1993 if (!pdev) 2087 iommu_pass_through = 0;
1994 continue;
1995 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1996 if (ret)
1997 printk(KERN_ERR
1998 "IOMMU: mapping reserved region failed\n");
1999 } 2088 }
2000 } 2089 }
2001 2090
2002 iommu_prepare_gfx_mapping(); 2091 /*
2092 * If pass through is not set or not enabled, setup context entries for
2093 * identity mappings for rmrr, gfx, and isa.
2094 */
2095 if (!iommu_pass_through) {
2096 /*
2097 * For each rmrr
2098 * for each dev attached to rmrr
2099 * do
2100 * locate drhd for dev, alloc domain for dev
2101 * allocate free domain
2102 * allocate page table entries for rmrr
2103 * if context not allocated for bus
2104 * allocate and init context
2105 * set present in root table for this bus
2106 * init context with domain, translation etc
2107 * endfor
2108 * endfor
2109 */
2110 for_each_rmrr_units(rmrr) {
2111 for (i = 0; i < rmrr->devices_cnt; i++) {
2112 pdev = rmrr->devices[i];
2113 /*
2114 * some BIOS lists non-exist devices in DMAR
2115 * table.
2116 */
2117 if (!pdev)
2118 continue;
2119 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2120 if (ret)
2121 printk(KERN_ERR
2122 "IOMMU: mapping reserved region failed\n");
2123 }
2124 }
2125
2126 iommu_prepare_gfx_mapping();
2003 2127
2004 iommu_prepare_isa(); 2128 iommu_prepare_isa();
2129 }
2005 2130
2006 /* 2131 /*
2007 * for each drhd 2132 * for each drhd
@@ -2023,10 +2148,8 @@ static int __init init_dmars(void)
2023 2148
2024 iommu_set_root_entry(iommu); 2149 iommu_set_root_entry(iommu);
2025 2150
2026 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, 2151 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2027 0); 2152 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2028 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2029 0);
2030 iommu_disable_protect_mem_regions(iommu); 2153 iommu_disable_protect_mem_regions(iommu);
2031 2154
2032 ret = iommu_enable_translation(iommu); 2155 ret = iommu_enable_translation(iommu);
@@ -2112,7 +2235,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
2112 2235
2113 /* make sure context mapping is ok */ 2236 /* make sure context mapping is ok */
2114 if (unlikely(!domain_context_mapped(pdev))) { 2237 if (unlikely(!domain_context_mapped(pdev))) {
2115 ret = domain_context_mapping(domain, pdev); 2238 ret = domain_context_mapping(domain, pdev,
2239 CONTEXT_TT_MULTI_LEVEL);
2116 if (ret) { 2240 if (ret) {
2117 printk(KERN_ERR 2241 printk(KERN_ERR
2118 "Domain context map for %s failed", 2242 "Domain context map for %s failed",
@@ -2173,10 +2297,11 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2173 if (ret) 2297 if (ret)
2174 goto error; 2298 goto error;
2175 2299
2176 /* it's a non-present to present mapping */ 2300 /* it's a non-present to present mapping. Only flush if caching mode */
2177 ret = iommu_flush_iotlb_psi(iommu, domain->id, 2301 if (cap_caching_mode(iommu->cap))
2178 start_paddr, size >> VTD_PAGE_SHIFT, 1); 2302 iommu_flush_iotlb_psi(iommu, 0, start_paddr,
2179 if (ret) 2303 size >> VTD_PAGE_SHIFT);
2304 else
2180 iommu_flush_write_buffer(iommu); 2305 iommu_flush_write_buffer(iommu);
2181 2306
2182 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2307 return start_paddr + ((u64)paddr & (~PAGE_MASK));
@@ -2210,15 +2335,22 @@ static void flush_unmaps(void)
2210 if (!iommu) 2335 if (!iommu)
2211 continue; 2336 continue;
2212 2337
2213 if (deferred_flush[i].next) { 2338 if (!deferred_flush[i].next)
2214 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2339 continue;
2215 DMA_TLB_GLOBAL_FLUSH, 0); 2340
2216 for (j = 0; j < deferred_flush[i].next; j++) { 2341 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2217 __free_iova(&deferred_flush[i].domain[j]->iovad, 2342 DMA_TLB_GLOBAL_FLUSH);
2218 deferred_flush[i].iova[j]); 2343 for (j = 0; j < deferred_flush[i].next; j++) {
2219 } 2344 unsigned long mask;
2220 deferred_flush[i].next = 0; 2345 struct iova *iova = deferred_flush[i].iova[j];
2346
2347 mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT;
2348 mask = ilog2(mask >> VTD_PAGE_SHIFT);
2349 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2350 iova->pfn_lo << PAGE_SHIFT, mask);
2351 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2221 } 2352 }
2353 deferred_flush[i].next = 0;
2222 } 2354 }
2223 2355
2224 list_size = 0; 2356 list_size = 0;
@@ -2291,9 +2423,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2291 /* free page tables */ 2423 /* free page tables */
2292 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2424 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2293 if (intel_iommu_strict) { 2425 if (intel_iommu_strict) {
2294 if (iommu_flush_iotlb_psi(iommu, 2426 iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2295 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) 2427 size >> VTD_PAGE_SHIFT);
2296 iommu_flush_write_buffer(iommu);
2297 /* free iova */ 2428 /* free iova */
2298 __free_iova(&domain->iovad, iova); 2429 __free_iova(&domain->iovad, iova);
2299 } else { 2430 } else {
@@ -2384,9 +2515,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2384 /* free page tables */ 2515 /* free page tables */
2385 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2516 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2386 2517
2387 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2518 iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2388 size >> VTD_PAGE_SHIFT, 0)) 2519 size >> VTD_PAGE_SHIFT);
2389 iommu_flush_write_buffer(iommu);
2390 2520
2391 /* free iova */ 2521 /* free iova */
2392 __free_iova(&domain->iovad, iova); 2522 __free_iova(&domain->iovad, iova);
@@ -2478,10 +2608,13 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2478 offset += size; 2608 offset += size;
2479 } 2609 }
2480 2610
2481 /* it's a non-present to present mapping */ 2611 /* it's a non-present to present mapping. Only flush if caching mode */
2482 if (iommu_flush_iotlb_psi(iommu, domain->id, 2612 if (cap_caching_mode(iommu->cap))
2483 start_addr, offset >> VTD_PAGE_SHIFT, 1)) 2613 iommu_flush_iotlb_psi(iommu, 0, start_addr,
2614 offset >> VTD_PAGE_SHIFT);
2615 else
2484 iommu_flush_write_buffer(iommu); 2616 iommu_flush_write_buffer(iommu);
2617
2485 return nelems; 2618 return nelems;
2486} 2619}
2487 2620
@@ -2640,9 +2773,9 @@ static int init_iommu_hw(void)
2640 iommu_set_root_entry(iommu); 2773 iommu_set_root_entry(iommu);
2641 2774
2642 iommu->flush.flush_context(iommu, 0, 0, 0, 2775 iommu->flush.flush_context(iommu, 0, 0, 0,
2643 DMA_CCMD_GLOBAL_INVL, 0); 2776 DMA_CCMD_GLOBAL_INVL);
2644 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2777 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2645 DMA_TLB_GLOBAL_FLUSH, 0); 2778 DMA_TLB_GLOBAL_FLUSH);
2646 iommu_disable_protect_mem_regions(iommu); 2779 iommu_disable_protect_mem_regions(iommu);
2647 iommu_enable_translation(iommu); 2780 iommu_enable_translation(iommu);
2648 } 2781 }
@@ -2657,9 +2790,9 @@ static void iommu_flush_all(void)
2657 2790
2658 for_each_active_iommu(iommu, drhd) { 2791 for_each_active_iommu(iommu, drhd) {
2659 iommu->flush.flush_context(iommu, 0, 0, 0, 2792 iommu->flush.flush_context(iommu, 0, 0, 0,
2660 DMA_CCMD_GLOBAL_INVL, 0); 2793 DMA_CCMD_GLOBAL_INVL);
2661 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2794 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2662 DMA_TLB_GLOBAL_FLUSH, 0); 2795 DMA_TLB_GLOBAL_FLUSH);
2663 } 2796 }
2664} 2797}
2665 2798
@@ -2782,7 +2915,7 @@ int __init intel_iommu_init(void)
2782 * Check the need for DMA-remapping initialization now. 2915 * Check the need for DMA-remapping initialization now.
2783 * Above initialization will also be used by Interrupt-remapping. 2916 * Above initialization will also be used by Interrupt-remapping.
2784 */ 2917 */
2785 if (no_iommu || swiotlb || dmar_disabled) 2918 if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled)
2786 return -ENODEV; 2919 return -ENODEV;
2787 2920
2788 iommu_init_mempool(); 2921 iommu_init_mempool();
@@ -2802,7 +2935,15 @@ int __init intel_iommu_init(void)
2802 2935
2803 init_timer(&unmap_timer); 2936 init_timer(&unmap_timer);
2804 force_iommu = 1; 2937 force_iommu = 1;
2805 dma_ops = &intel_dma_ops; 2938
2939 if (!iommu_pass_through) {
2940 printk(KERN_INFO
2941 "Multi-level page-table translation for DMAR.\n");
2942 dma_ops = &intel_dma_ops;
2943 } else
2944 printk(KERN_INFO
2945 "DMAR: Pass through translation for DMAR.\n");
2946
2806 init_iommu_sysfs(); 2947 init_iommu_sysfs();
2807 2948
2808 register_iommu(&intel_iommu_ops); 2949 register_iommu(&intel_iommu_ops);
@@ -2888,6 +3029,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2888 info->dev->dev.archdata.iommu = NULL; 3029 info->dev->dev.archdata.iommu = NULL;
2889 spin_unlock_irqrestore(&device_domain_lock, flags); 3030 spin_unlock_irqrestore(&device_domain_lock, flags);
2890 3031
3032 iommu_disable_dev_iotlb(info);
2891 iommu_detach_dev(iommu, info->bus, info->devfn); 3033 iommu_detach_dev(iommu, info->bus, info->devfn);
2892 iommu_detach_dependent_devices(iommu, pdev); 3034 iommu_detach_dependent_devices(iommu, pdev);
2893 free_devinfo_mem(info); 3035 free_devinfo_mem(info);
@@ -2938,6 +3080,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2938 3080
2939 spin_unlock_irqrestore(&device_domain_lock, flags1); 3081 spin_unlock_irqrestore(&device_domain_lock, flags1);
2940 3082
3083 iommu_disable_dev_iotlb(info);
2941 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 3084 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
2942 iommu_detach_dev(iommu, info->bus, info->devfn); 3085 iommu_detach_dev(iommu, info->bus, info->devfn);
2943 iommu_detach_dependent_devices(iommu, info->dev); 3086 iommu_detach_dependent_devices(iommu, info->dev);
@@ -3142,11 +3285,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3142 return -EFAULT; 3285 return -EFAULT;
3143 } 3286 }
3144 3287
3145 ret = domain_context_mapping(dmar_domain, pdev); 3288 ret = vm_domain_add_dev_info(dmar_domain, pdev);
3146 if (ret) 3289 if (ret)
3147 return ret; 3290 return ret;
3148 3291
3149 ret = vm_domain_add_dev_info(dmar_domain, pdev); 3292 ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3150 return ret; 3293 return ret;
3151} 3294}
3152 3295