aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:21 -0400
committerDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:21 -0400
commitbbb20089a3275a19e475dbc21320c3742e3ca423 (patch)
tree216fdc1cbef450ca688135c5b8969169482d9a48 /drivers/pci/intel-iommu.c
parent3e48e656903e9fd8bc805c6a2c4264d7808d315b (diff)
parent657a77fa7284d8ae28dfa48f1dc5d919bf5b2843 (diff)
Merge branch 'dmaengine' into async-tx-next
Conflicts: crypto/async_tx/async_xor.c drivers/dma/ioat/dma_v2.h drivers/dma/ioat/pci.c drivers/md/raid5.c
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c764
1 files changed, 546 insertions, 218 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index a563fbe559d0..e53eacd75c8d 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
39#include <linux/sysdev.h> 39#include <linux/sysdev.h>
40#include <asm/cacheflush.h> 40#include <asm/cacheflush.h>
41#include <asm/iommu.h> 41#include <asm/iommu.h>
42#include <asm/e820.h>
42#include "pci.h" 43#include "pci.h"
43 44
44#define ROOT_SIZE VTD_PAGE_SIZE 45#define ROOT_SIZE VTD_PAGE_SIZE
@@ -53,6 +54,8 @@
53 54
54#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 55#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
55 56
57#define MAX_AGAW_WIDTH 64
58
56#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 59#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
57 60
58#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
@@ -131,8 +134,6 @@ static inline void context_set_fault_enable(struct context_entry *context)
131 context->lo &= (((u64)-1) << 2) | 1; 134 context->lo &= (((u64)-1) << 2) | 1;
132} 135}
133 136
134#define CONTEXT_TT_MULTI_LEVEL 0
135
136static inline void context_set_translation_type(struct context_entry *context, 137static inline void context_set_translation_type(struct context_entry *context,
137 unsigned long value) 138 unsigned long value)
138{ 139{
@@ -217,6 +218,14 @@ static inline bool dma_pte_present(struct dma_pte *pte)
217 return (pte->val & 3) != 0; 218 return (pte->val & 3) != 0;
218} 219}
219 220
221/*
222 * This domain is a statically identity mapping domain.
223 * 1. This domain creats a static 1:1 mapping to all usable memory.
224 * 2. It maps to each iommu if successful.
225 * 3. Each iommu mapps to this domain if successful.
226 */
227struct dmar_domain *si_domain;
228
220/* devices under the same p2p bridge are owned in one domain */ 229/* devices under the same p2p bridge are owned in one domain */
221#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) 230#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
222 231
@@ -225,6 +234,9 @@ static inline bool dma_pte_present(struct dma_pte *pte)
225 */ 234 */
226#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) 235#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
227 236
237/* si_domain contains mulitple devices */
238#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
239
228struct dmar_domain { 240struct dmar_domain {
229 int id; /* domain id */ 241 int id; /* domain id */
230 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ 242 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
@@ -256,6 +268,7 @@ struct device_domain_info {
256 u8 bus; /* PCI bus number */ 268 u8 bus; /* PCI bus number */
257 u8 devfn; /* PCI devfn number */ 269 u8 devfn; /* PCI devfn number */
258 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ 270 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
271 struct intel_iommu *iommu; /* IOMMU used by this device */
259 struct dmar_domain *domain; /* pointer to domain */ 272 struct dmar_domain *domain; /* pointer to domain */
260}; 273};
261 274
@@ -401,17 +414,13 @@ void free_iova_mem(struct iova *iova)
401 414
402static inline int width_to_agaw(int width); 415static inline int width_to_agaw(int width);
403 416
404/* calculate agaw for each iommu. 417static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
405 * "SAGAW" may be different across iommus, use a default agaw, and
406 * get a supported less agaw for iommus that don't support the default agaw.
407 */
408int iommu_calculate_agaw(struct intel_iommu *iommu)
409{ 418{
410 unsigned long sagaw; 419 unsigned long sagaw;
411 int agaw = -1; 420 int agaw = -1;
412 421
413 sagaw = cap_sagaw(iommu->cap); 422 sagaw = cap_sagaw(iommu->cap);
414 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); 423 for (agaw = width_to_agaw(max_gaw);
415 agaw >= 0; agaw--) { 424 agaw >= 0; agaw--) {
416 if (test_bit(agaw, &sagaw)) 425 if (test_bit(agaw, &sagaw))
417 break; 426 break;
@@ -420,12 +429,32 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
420 return agaw; 429 return agaw;
421} 430}
422 431
423/* in native case, each domain is related to only one iommu */ 432/*
433 * Calculate max SAGAW for each iommu.
434 */
435int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
436{
437 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
438}
439
440/*
441 * calculate agaw for each iommu.
442 * "SAGAW" may be different across iommus, use a default agaw, and
443 * get a supported less agaw for iommus that don't support the default agaw.
444 */
445int iommu_calculate_agaw(struct intel_iommu *iommu)
446{
447 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
448}
449
450/* This functionin only returns single iommu in a domain */
424static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 451static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
425{ 452{
426 int iommu_id; 453 int iommu_id;
427 454
455 /* si_domain and vm domain should not get here. */
428 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); 456 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
457 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
429 458
430 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 459 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
431 if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 460 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
@@ -809,7 +838,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
809static void iommu_set_root_entry(struct intel_iommu *iommu) 838static void iommu_set_root_entry(struct intel_iommu *iommu)
810{ 839{
811 void *addr; 840 void *addr;
812 u32 cmd, sts; 841 u32 sts;
813 unsigned long flag; 842 unsigned long flag;
814 843
815 addr = iommu->root_entry; 844 addr = iommu->root_entry;
@@ -817,12 +846,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
817 spin_lock_irqsave(&iommu->register_lock, flag); 846 spin_lock_irqsave(&iommu->register_lock, flag);
818 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); 847 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
819 848
820 cmd = iommu->gcmd | DMA_GCMD_SRTP; 849 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
821 writel(cmd, iommu->reg + DMAR_GCMD_REG);
822 850
823 /* Make sure hardware complete it */ 851 /* Make sure hardware complete it */
824 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 852 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
825 readl, (sts & DMA_GSTS_RTPS), sts); 853 readl, (sts & DMA_GSTS_RTPS), sts);
826 854
827 spin_unlock_irqrestore(&iommu->register_lock, flag); 855 spin_unlock_irqrestore(&iommu->register_lock, flag);
828} 856}
@@ -834,39 +862,25 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu)
834 862
835 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 863 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
836 return; 864 return;
837 val = iommu->gcmd | DMA_GCMD_WBF;
838 865
839 spin_lock_irqsave(&iommu->register_lock, flag); 866 spin_lock_irqsave(&iommu->register_lock, flag);
840 writel(val, iommu->reg + DMAR_GCMD_REG); 867 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
841 868
842 /* Make sure hardware complete it */ 869 /* Make sure hardware complete it */
843 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 870 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
844 readl, (!(val & DMA_GSTS_WBFS)), val); 871 readl, (!(val & DMA_GSTS_WBFS)), val);
845 872
846 spin_unlock_irqrestore(&iommu->register_lock, flag); 873 spin_unlock_irqrestore(&iommu->register_lock, flag);
847} 874}
848 875
849/* return value determine if we need a write buffer flush */ 876/* return value determine if we need a write buffer flush */
850static int __iommu_flush_context(struct intel_iommu *iommu, 877static void __iommu_flush_context(struct intel_iommu *iommu,
851 u16 did, u16 source_id, u8 function_mask, u64 type, 878 u16 did, u16 source_id, u8 function_mask,
852 int non_present_entry_flush) 879 u64 type)
853{ 880{
854 u64 val = 0; 881 u64 val = 0;
855 unsigned long flag; 882 unsigned long flag;
856 883
857 /*
858 * In the non-present entry flush case, if hardware doesn't cache
859 * non-present entry we do nothing and if hardware cache non-present
860 * entry, we flush entries of domain 0 (the domain id is used to cache
861 * any non-present entries)
862 */
863 if (non_present_entry_flush) {
864 if (!cap_caching_mode(iommu->cap))
865 return 1;
866 else
867 did = 0;
868 }
869
870 switch (type) { 884 switch (type) {
871 case DMA_CCMD_GLOBAL_INVL: 885 case DMA_CCMD_GLOBAL_INVL:
872 val = DMA_CCMD_GLOBAL_INVL; 886 val = DMA_CCMD_GLOBAL_INVL;
@@ -891,33 +905,16 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
891 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 905 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
892 906
893 spin_unlock_irqrestore(&iommu->register_lock, flag); 907 spin_unlock_irqrestore(&iommu->register_lock, flag);
894
895 /* flush context entry will implicitly flush write buffer */
896 return 0;
897} 908}
898 909
899/* return value determine if we need a write buffer flush */ 910/* return value determine if we need a write buffer flush */
900static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 911static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
901 u64 addr, unsigned int size_order, u64 type, 912 u64 addr, unsigned int size_order, u64 type)
902 int non_present_entry_flush)
903{ 913{
904 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 914 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
905 u64 val = 0, val_iva = 0; 915 u64 val = 0, val_iva = 0;
906 unsigned long flag; 916 unsigned long flag;
907 917
908 /*
909 * In the non-present entry flush case, if hardware doesn't cache
910 * non-present entry we do nothing and if hardware cache non-present
911 * entry, we flush entries of domain 0 (the domain id is used to cache
912 * any non-present entries)
913 */
914 if (non_present_entry_flush) {
915 if (!cap_caching_mode(iommu->cap))
916 return 1;
917 else
918 did = 0;
919 }
920
921 switch (type) { 918 switch (type) {
922 case DMA_TLB_GLOBAL_FLUSH: 919 case DMA_TLB_GLOBAL_FLUSH:
923 /* global flush doesn't need set IVA_REG */ 920 /* global flush doesn't need set IVA_REG */
@@ -965,37 +962,101 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
965 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 962 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
966 (unsigned long long)DMA_TLB_IIRG(type), 963 (unsigned long long)DMA_TLB_IIRG(type),
967 (unsigned long long)DMA_TLB_IAIG(val)); 964 (unsigned long long)DMA_TLB_IAIG(val));
968 /* flush iotlb entry will implicitly flush write buffer */
969 return 0;
970} 965}
971 966
972static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 967static struct device_domain_info *iommu_support_dev_iotlb(
973 u64 addr, unsigned int pages, int non_present_entry_flush) 968 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
969{
970 int found = 0;
971 unsigned long flags;
972 struct device_domain_info *info;
973 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
974
975 if (!ecap_dev_iotlb_support(iommu->ecap))
976 return NULL;
977
978 if (!iommu->qi)
979 return NULL;
980
981 spin_lock_irqsave(&device_domain_lock, flags);
982 list_for_each_entry(info, &domain->devices, link)
983 if (info->bus == bus && info->devfn == devfn) {
984 found = 1;
985 break;
986 }
987 spin_unlock_irqrestore(&device_domain_lock, flags);
988
989 if (!found || !info->dev)
990 return NULL;
991
992 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
993 return NULL;
994
995 if (!dmar_find_matched_atsr_unit(info->dev))
996 return NULL;
997
998 info->iommu = iommu;
999
1000 return info;
1001}
1002
1003static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1004{
1005 if (!info)
1006 return;
1007
1008 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1009}
1010
1011static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1012{
1013 if (!info->dev || !pci_ats_enabled(info->dev))
1014 return;
1015
1016 pci_disable_ats(info->dev);
1017}
1018
1019static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1020 u64 addr, unsigned mask)
974{ 1021{
975 unsigned int mask; 1022 u16 sid, qdep;
1023 unsigned long flags;
1024 struct device_domain_info *info;
1025
1026 spin_lock_irqsave(&device_domain_lock, flags);
1027 list_for_each_entry(info, &domain->devices, link) {
1028 if (!info->dev || !pci_ats_enabled(info->dev))
1029 continue;
1030
1031 sid = info->bus << 8 | info->devfn;
1032 qdep = pci_ats_queue_depth(info->dev);
1033 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1034 }
1035 spin_unlock_irqrestore(&device_domain_lock, flags);
1036}
1037
1038static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1039 u64 addr, unsigned int pages)
1040{
1041 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
976 1042
977 BUG_ON(addr & (~VTD_PAGE_MASK)); 1043 BUG_ON(addr & (~VTD_PAGE_MASK));
978 BUG_ON(pages == 0); 1044 BUG_ON(pages == 0);
979 1045
980 /* Fallback to domain selective flush if no PSI support */
981 if (!cap_pgsel_inv(iommu->cap))
982 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
983 DMA_TLB_DSI_FLUSH,
984 non_present_entry_flush);
985
986 /* 1046 /*
1047 * Fallback to domain selective flush if no PSI support or the size is
1048 * too big.
987 * PSI requires page size to be 2 ^ x, and the base address is naturally 1049 * PSI requires page size to be 2 ^ x, and the base address is naturally
988 * aligned to the size 1050 * aligned to the size
989 */ 1051 */
990 mask = ilog2(__roundup_pow_of_two(pages)); 1052 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
991 /* Fallback to domain selective flush if size is too big */ 1053 iommu->flush.flush_iotlb(iommu, did, 0, 0,
992 if (mask > cap_max_amask_val(iommu->cap)) 1054 DMA_TLB_DSI_FLUSH);
993 return iommu->flush.flush_iotlb(iommu, did, 0, 0, 1055 else
994 DMA_TLB_DSI_FLUSH, non_present_entry_flush); 1056 iommu->flush.flush_iotlb(iommu, did, addr, mask,
995 1057 DMA_TLB_PSI_FLUSH);
996 return iommu->flush.flush_iotlb(iommu, did, addr, mask, 1058 if (did)
997 DMA_TLB_PSI_FLUSH, 1059 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
998 non_present_entry_flush);
999} 1060}
1000 1061
1001static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1062static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1021,13 +1082,13 @@ static int iommu_enable_translation(struct intel_iommu *iommu)
1021 unsigned long flags; 1082 unsigned long flags;
1022 1083
1023 spin_lock_irqsave(&iommu->register_lock, flags); 1084 spin_lock_irqsave(&iommu->register_lock, flags);
1024 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); 1085 iommu->gcmd |= DMA_GCMD_TE;
1086 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1025 1087
1026 /* Make sure hardware complete it */ 1088 /* Make sure hardware complete it */
1027 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1089 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1028 readl, (sts & DMA_GSTS_TES), sts); 1090 readl, (sts & DMA_GSTS_TES), sts);
1029 1091
1030 iommu->gcmd |= DMA_GCMD_TE;
1031 spin_unlock_irqrestore(&iommu->register_lock, flags); 1092 spin_unlock_irqrestore(&iommu->register_lock, flags);
1032 return 0; 1093 return 0;
1033} 1094}
@@ -1043,7 +1104,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
1043 1104
1044 /* Make sure hardware complete it */ 1105 /* Make sure hardware complete it */
1045 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1106 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1046 readl, (!(sts & DMA_GSTS_TES)), sts); 1107 readl, (!(sts & DMA_GSTS_TES)), sts);
1047 1108
1048 spin_unlock_irqrestore(&iommu->register_lock, flag); 1109 spin_unlock_irqrestore(&iommu->register_lock, flag);
1049 return 0; 1110 return 0;
@@ -1142,48 +1203,71 @@ void free_dmar_iommu(struct intel_iommu *iommu)
1142 free_context_table(iommu); 1203 free_context_table(iommu);
1143} 1204}
1144 1205
1145static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) 1206static struct dmar_domain *alloc_domain(void)
1146{ 1207{
1147 unsigned long num;
1148 unsigned long ndomains;
1149 struct dmar_domain *domain; 1208 struct dmar_domain *domain;
1150 unsigned long flags;
1151 1209
1152 domain = alloc_domain_mem(); 1210 domain = alloc_domain_mem();
1153 if (!domain) 1211 if (!domain)
1154 return NULL; 1212 return NULL;
1155 1213
1214 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1215 domain->flags = 0;
1216
1217 return domain;
1218}
1219
1220static int iommu_attach_domain(struct dmar_domain *domain,
1221 struct intel_iommu *iommu)
1222{
1223 int num;
1224 unsigned long ndomains;
1225 unsigned long flags;
1226
1156 ndomains = cap_ndoms(iommu->cap); 1227 ndomains = cap_ndoms(iommu->cap);
1157 1228
1158 spin_lock_irqsave(&iommu->lock, flags); 1229 spin_lock_irqsave(&iommu->lock, flags);
1230
1159 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1231 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1160 if (num >= ndomains) { 1232 if (num >= ndomains) {
1161 spin_unlock_irqrestore(&iommu->lock, flags); 1233 spin_unlock_irqrestore(&iommu->lock, flags);
1162 free_domain_mem(domain);
1163 printk(KERN_ERR "IOMMU: no free domain ids\n"); 1234 printk(KERN_ERR "IOMMU: no free domain ids\n");
1164 return NULL; 1235 return -ENOMEM;
1165 } 1236 }
1166 1237
1167 set_bit(num, iommu->domain_ids);
1168 domain->id = num; 1238 domain->id = num;
1169 memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 1239 set_bit(num, iommu->domain_ids);
1170 set_bit(iommu->seq_id, &domain->iommu_bmp); 1240 set_bit(iommu->seq_id, &domain->iommu_bmp);
1171 domain->flags = 0;
1172 iommu->domains[num] = domain; 1241 iommu->domains[num] = domain;
1173 spin_unlock_irqrestore(&iommu->lock, flags); 1242 spin_unlock_irqrestore(&iommu->lock, flags);
1174 1243
1175 return domain; 1244 return 0;
1176} 1245}
1177 1246
1178static void iommu_free_domain(struct dmar_domain *domain) 1247static void iommu_detach_domain(struct dmar_domain *domain,
1248 struct intel_iommu *iommu)
1179{ 1249{
1180 unsigned long flags; 1250 unsigned long flags;
1181 struct intel_iommu *iommu; 1251 int num, ndomains;
1182 1252 int found = 0;
1183 iommu = domain_get_iommu(domain);
1184 1253
1185 spin_lock_irqsave(&iommu->lock, flags); 1254 spin_lock_irqsave(&iommu->lock, flags);
1186 clear_bit(domain->id, iommu->domain_ids); 1255 ndomains = cap_ndoms(iommu->cap);
1256 num = find_first_bit(iommu->domain_ids, ndomains);
1257 for (; num < ndomains; ) {
1258 if (iommu->domains[num] == domain) {
1259 found = 1;
1260 break;
1261 }
1262 num = find_next_bit(iommu->domain_ids,
1263 cap_ndoms(iommu->cap), num+1);
1264 }
1265
1266 if (found) {
1267 clear_bit(num, iommu->domain_ids);
1268 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1269 iommu->domains[num] = NULL;
1270 }
1187 spin_unlock_irqrestore(&iommu->lock, flags); 1271 spin_unlock_irqrestore(&iommu->lock, flags);
1188} 1272}
1189 1273
@@ -1303,6 +1387,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1303 1387
1304static void domain_exit(struct dmar_domain *domain) 1388static void domain_exit(struct dmar_domain *domain)
1305{ 1389{
1390 struct dmar_drhd_unit *drhd;
1391 struct intel_iommu *iommu;
1306 u64 end; 1392 u64 end;
1307 1393
1308 /* Domain 0 is reserved, so dont process it */ 1394 /* Domain 0 is reserved, so dont process it */
@@ -1321,12 +1407,15 @@ static void domain_exit(struct dmar_domain *domain)
1321 /* free page tables */ 1407 /* free page tables */
1322 dma_pte_free_pagetable(domain, 0, end); 1408 dma_pte_free_pagetable(domain, 0, end);
1323 1409
1324 iommu_free_domain(domain); 1410 for_each_active_iommu(iommu, drhd)
1411 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1412 iommu_detach_domain(domain, iommu);
1413
1325 free_domain_mem(domain); 1414 free_domain_mem(domain);
1326} 1415}
1327 1416
1328static int domain_context_mapping_one(struct dmar_domain *domain, 1417static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1329 int segment, u8 bus, u8 devfn) 1418 u8 bus, u8 devfn, int translation)
1330{ 1419{
1331 struct context_entry *context; 1420 struct context_entry *context;
1332 unsigned long flags; 1421 unsigned long flags;
@@ -1336,10 +1425,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1336 unsigned long ndomains; 1425 unsigned long ndomains;
1337 int id; 1426 int id;
1338 int agaw; 1427 int agaw;
1428 struct device_domain_info *info = NULL;
1339 1429
1340 pr_debug("Set context mapping for %02x:%02x.%d\n", 1430 pr_debug("Set context mapping for %02x:%02x.%d\n",
1341 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1431 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1432
1342 BUG_ON(!domain->pgd); 1433 BUG_ON(!domain->pgd);
1434 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1435 translation != CONTEXT_TT_MULTI_LEVEL);
1343 1436
1344 iommu = device_to_iommu(segment, bus, devfn); 1437 iommu = device_to_iommu(segment, bus, devfn);
1345 if (!iommu) 1438 if (!iommu)
@@ -1357,7 +1450,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1357 id = domain->id; 1450 id = domain->id;
1358 pgd = domain->pgd; 1451 pgd = domain->pgd;
1359 1452
1360 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { 1453 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1454 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1361 int found = 0; 1455 int found = 0;
1362 1456
1363 /* find an available domain id for this device in iommu */ 1457 /* find an available domain id for this device in iommu */
@@ -1382,6 +1476,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1382 } 1476 }
1383 1477
1384 set_bit(num, iommu->domain_ids); 1478 set_bit(num, iommu->domain_ids);
1479 set_bit(iommu->seq_id, &domain->iommu_bmp);
1385 iommu->domains[num] = domain; 1480 iommu->domains[num] = domain;
1386 id = num; 1481 id = num;
1387 } 1482 }
@@ -1399,21 +1494,44 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1399 } 1494 }
1400 1495
1401 context_set_domain_id(context, id); 1496 context_set_domain_id(context, id);
1402 context_set_address_width(context, iommu->agaw); 1497
1403 context_set_address_root(context, virt_to_phys(pgd)); 1498 if (translation != CONTEXT_TT_PASS_THROUGH) {
1404 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); 1499 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1500 translation = info ? CONTEXT_TT_DEV_IOTLB :
1501 CONTEXT_TT_MULTI_LEVEL;
1502 }
1503 /*
1504 * In pass through mode, AW must be programmed to indicate the largest
1505 * AGAW value supported by hardware. And ASR is ignored by hardware.
1506 */
1507 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1508 context_set_address_width(context, iommu->msagaw);
1509 else {
1510 context_set_address_root(context, virt_to_phys(pgd));
1511 context_set_address_width(context, iommu->agaw);
1512 }
1513
1514 context_set_translation_type(context, translation);
1405 context_set_fault_enable(context); 1515 context_set_fault_enable(context);
1406 context_set_present(context); 1516 context_set_present(context);
1407 domain_flush_cache(domain, context, sizeof(*context)); 1517 domain_flush_cache(domain, context, sizeof(*context));
1408 1518
1409 /* it's a non-present to present mapping */ 1519 /*
1410 if (iommu->flush.flush_context(iommu, domain->id, 1520 * It's a non-present to present mapping. If hardware doesn't cache
1411 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1521 * non-present entry we only need to flush the write-buffer. If the
1412 DMA_CCMD_DEVICE_INVL, 1)) 1522 * _does_ cache non-present entries, then it does so in the special
1523 * domain #0, which we have to flush:
1524 */
1525 if (cap_caching_mode(iommu->cap)) {
1526 iommu->flush.flush_context(iommu, 0,
1527 (((u16)bus) << 8) | devfn,
1528 DMA_CCMD_MASK_NOBIT,
1529 DMA_CCMD_DEVICE_INVL);
1530 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
1531 } else {
1413 iommu_flush_write_buffer(iommu); 1532 iommu_flush_write_buffer(iommu);
1414 else 1533 }
1415 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); 1534 iommu_enable_dev_iotlb(info);
1416
1417 spin_unlock_irqrestore(&iommu->lock, flags); 1535 spin_unlock_irqrestore(&iommu->lock, flags);
1418 1536
1419 spin_lock_irqsave(&domain->iommu_lock, flags); 1537 spin_lock_irqsave(&domain->iommu_lock, flags);
@@ -1426,13 +1544,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1426} 1544}
1427 1545
1428static int 1546static int
1429domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) 1547domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1548 int translation)
1430{ 1549{
1431 int ret; 1550 int ret;
1432 struct pci_dev *tmp, *parent; 1551 struct pci_dev *tmp, *parent;
1433 1552
1434 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), 1553 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1435 pdev->bus->number, pdev->devfn); 1554 pdev->bus->number, pdev->devfn,
1555 translation);
1436 if (ret) 1556 if (ret)
1437 return ret; 1557 return ret;
1438 1558
@@ -1446,7 +1566,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1446 ret = domain_context_mapping_one(domain, 1566 ret = domain_context_mapping_one(domain,
1447 pci_domain_nr(parent->bus), 1567 pci_domain_nr(parent->bus),
1448 parent->bus->number, 1568 parent->bus->number,
1449 parent->devfn); 1569 parent->devfn, translation);
1450 if (ret) 1570 if (ret)
1451 return ret; 1571 return ret;
1452 parent = parent->bus->self; 1572 parent = parent->bus->self;
@@ -1454,12 +1574,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1454 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ 1574 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1455 return domain_context_mapping_one(domain, 1575 return domain_context_mapping_one(domain,
1456 pci_domain_nr(tmp->subordinate), 1576 pci_domain_nr(tmp->subordinate),
1457 tmp->subordinate->number, 0); 1577 tmp->subordinate->number, 0,
1578 translation);
1458 else /* this is a legacy PCI bridge */ 1579 else /* this is a legacy PCI bridge */
1459 return domain_context_mapping_one(domain, 1580 return domain_context_mapping_one(domain,
1460 pci_domain_nr(tmp->bus), 1581 pci_domain_nr(tmp->bus),
1461 tmp->bus->number, 1582 tmp->bus->number,
1462 tmp->devfn); 1583 tmp->devfn,
1584 translation);
1463} 1585}
1464 1586
1465static int domain_context_mapped(struct pci_dev *pdev) 1587static int domain_context_mapped(struct pci_dev *pdev)
@@ -1540,9 +1662,8 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1540 1662
1541 clear_context_table(iommu, bus, devfn); 1663 clear_context_table(iommu, bus, devfn);
1542 iommu->flush.flush_context(iommu, 0, 0, 0, 1664 iommu->flush.flush_context(iommu, 0, 0, 0,
1543 DMA_CCMD_GLOBAL_INVL, 0); 1665 DMA_CCMD_GLOBAL_INVL);
1544 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1666 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1545 DMA_TLB_GLOBAL_FLUSH, 0);
1546} 1667}
1547 1668
1548static void domain_remove_dev_info(struct dmar_domain *domain) 1669static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -1561,6 +1682,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
1561 info->dev->dev.archdata.iommu = NULL; 1682 info->dev->dev.archdata.iommu = NULL;
1562 spin_unlock_irqrestore(&device_domain_lock, flags); 1683 spin_unlock_irqrestore(&device_domain_lock, flags);
1563 1684
1685 iommu_disable_dev_iotlb(info);
1564 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 1686 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1565 iommu_detach_dev(iommu, info->bus, info->devfn); 1687 iommu_detach_dev(iommu, info->bus, info->devfn);
1566 free_devinfo_mem(info); 1688 free_devinfo_mem(info);
@@ -1597,6 +1719,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1597 unsigned long flags; 1719 unsigned long flags;
1598 int bus = 0, devfn = 0; 1720 int bus = 0, devfn = 0;
1599 int segment; 1721 int segment;
1722 int ret;
1600 1723
1601 domain = find_domain(pdev); 1724 domain = find_domain(pdev);
1602 if (domain) 1725 if (domain)
@@ -1629,6 +1752,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1629 } 1752 }
1630 } 1753 }
1631 1754
1755 domain = alloc_domain();
1756 if (!domain)
1757 goto error;
1758
1632 /* Allocate new domain for the device */ 1759 /* Allocate new domain for the device */
1633 drhd = dmar_find_matched_drhd_unit(pdev); 1760 drhd = dmar_find_matched_drhd_unit(pdev);
1634 if (!drhd) { 1761 if (!drhd) {
@@ -1638,9 +1765,11 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1638 } 1765 }
1639 iommu = drhd->iommu; 1766 iommu = drhd->iommu;
1640 1767
1641 domain = iommu_alloc_domain(iommu); 1768 ret = iommu_attach_domain(domain, iommu);
1642 if (!domain) 1769 if (ret) {
1770 domain_exit(domain);
1643 goto error; 1771 goto error;
1772 }
1644 1773
1645 if (domain_init(domain, gaw)) { 1774 if (domain_init(domain, gaw)) {
1646 domain_exit(domain); 1775 domain_exit(domain);
@@ -1714,6 +1843,8 @@ error:
1714 return find_domain(pdev); 1843 return find_domain(pdev);
1715} 1844}
1716 1845
1846static int iommu_identity_mapping;
1847
1717static int iommu_prepare_identity_map(struct pci_dev *pdev, 1848static int iommu_prepare_identity_map(struct pci_dev *pdev,
1718 unsigned long long start, 1849 unsigned long long start,
1719 unsigned long long end) 1850 unsigned long long end)
@@ -1726,8 +1857,11 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
1726 printk(KERN_INFO 1857 printk(KERN_INFO
1727 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1858 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1728 pci_name(pdev), start, end); 1859 pci_name(pdev), start, end);
1729 /* page table init */ 1860 if (iommu_identity_mapping)
1730 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 1861 domain = si_domain;
1862 else
1863 /* page table init */
1864 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1731 if (!domain) 1865 if (!domain)
1732 return -ENOMEM; 1866 return -ENOMEM;
1733 1867
@@ -1756,7 +1890,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
1756 goto error; 1890 goto error;
1757 1891
1758 /* context entry init */ 1892 /* context entry init */
1759 ret = domain_context_mapping(domain, pdev); 1893 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1760 if (!ret) 1894 if (!ret)
1761 return 0; 1895 return 0;
1762error: 1896error:
@@ -1857,13 +1991,141 @@ static inline void iommu_prepare_isa(void)
1857} 1991}
1858#endif /* !CONFIG_DMAR_FLPY_WA */ 1992#endif /* !CONFIG_DMAR_FLPY_WA */
1859 1993
1860static int __init init_dmars(void) 1994/* Initialize each context entry as pass through.*/
1995static int __init init_context_pass_through(void)
1996{
1997 struct pci_dev *pdev = NULL;
1998 struct dmar_domain *domain;
1999 int ret;
2000
2001 for_each_pci_dev(pdev) {
2002 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2003 ret = domain_context_mapping(domain, pdev,
2004 CONTEXT_TT_PASS_THROUGH);
2005 if (ret)
2006 return ret;
2007 }
2008 return 0;
2009}
2010
2011static int md_domain_init(struct dmar_domain *domain, int guest_width);
2012static int si_domain_init(void)
2013{
2014 struct dmar_drhd_unit *drhd;
2015 struct intel_iommu *iommu;
2016 int ret = 0;
2017
2018 si_domain = alloc_domain();
2019 if (!si_domain)
2020 return -EFAULT;
2021
2022
2023 for_each_active_iommu(iommu, drhd) {
2024 ret = iommu_attach_domain(si_domain, iommu);
2025 if (ret) {
2026 domain_exit(si_domain);
2027 return -EFAULT;
2028 }
2029 }
2030
2031 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2032 domain_exit(si_domain);
2033 return -EFAULT;
2034 }
2035
2036 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2037
2038 return 0;
2039}
2040
2041static void domain_remove_one_dev_info(struct dmar_domain *domain,
2042 struct pci_dev *pdev);
2043static int identity_mapping(struct pci_dev *pdev)
2044{
2045 struct device_domain_info *info;
2046
2047 if (likely(!iommu_identity_mapping))
2048 return 0;
2049
2050
2051 list_for_each_entry(info, &si_domain->devices, link)
2052 if (info->dev == pdev)
2053 return 1;
2054 return 0;
2055}
2056
2057static int domain_add_dev_info(struct dmar_domain *domain,
2058 struct pci_dev *pdev)
2059{
2060 struct device_domain_info *info;
2061 unsigned long flags;
2062
2063 info = alloc_devinfo_mem();
2064 if (!info)
2065 return -ENOMEM;
2066
2067 info->segment = pci_domain_nr(pdev->bus);
2068 info->bus = pdev->bus->number;
2069 info->devfn = pdev->devfn;
2070 info->dev = pdev;
2071 info->domain = domain;
2072
2073 spin_lock_irqsave(&device_domain_lock, flags);
2074 list_add(&info->link, &domain->devices);
2075 list_add(&info->global, &device_domain_list);
2076 pdev->dev.archdata.iommu = info;
2077 spin_unlock_irqrestore(&device_domain_lock, flags);
2078
2079 return 0;
2080}
2081
2082static int iommu_prepare_static_identity_mapping(void)
2083{
2084 int i;
2085 struct pci_dev *pdev = NULL;
2086 int ret;
2087
2088 ret = si_domain_init();
2089 if (ret)
2090 return -EFAULT;
2091
2092 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2093 for_each_pci_dev(pdev) {
2094 for (i = 0; i < e820.nr_map; i++) {
2095 struct e820entry *ei = &e820.map[i];
2096
2097 if (ei->type == E820_RAM) {
2098 ret = iommu_prepare_identity_map(pdev,
2099 ei->addr, ei->addr + ei->size);
2100 if (ret) {
2101 printk(KERN_INFO "1:1 mapping to one domain failed.\n");
2102 return -EFAULT;
2103 }
2104 }
2105 }
2106 ret = domain_add_dev_info(si_domain, pdev);
2107 if (ret)
2108 return ret;
2109 }
2110
2111 return 0;
2112}
2113
2114int __init init_dmars(void)
1861{ 2115{
1862 struct dmar_drhd_unit *drhd; 2116 struct dmar_drhd_unit *drhd;
1863 struct dmar_rmrr_unit *rmrr; 2117 struct dmar_rmrr_unit *rmrr;
1864 struct pci_dev *pdev; 2118 struct pci_dev *pdev;
1865 struct intel_iommu *iommu; 2119 struct intel_iommu *iommu;
1866 int i, ret; 2120 int i, ret;
2121 int pass_through = 1;
2122
2123 /*
2124 * In case pass through can not be enabled, iommu tries to use identity
2125 * mapping.
2126 */
2127 if (iommu_pass_through)
2128 iommu_identity_mapping = 1;
1867 2129
1868 /* 2130 /*
1869 * for each drhd 2131 * for each drhd
@@ -1917,7 +2179,15 @@ static int __init init_dmars(void)
1917 printk(KERN_ERR "IOMMU: allocate root entry failed\n"); 2179 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1918 goto error; 2180 goto error;
1919 } 2181 }
2182 if (!ecap_pass_through(iommu->ecap))
2183 pass_through = 0;
1920 } 2184 }
2185 if (iommu_pass_through)
2186 if (!pass_through) {
2187 printk(KERN_INFO
2188 "Pass Through is not supported by hardware.\n");
2189 iommu_pass_through = 0;
2190 }
1921 2191
1922 /* 2192 /*
1923 * Start from the sane iommu hardware state. 2193 * Start from the sane iommu hardware state.
@@ -1972,45 +2242,61 @@ static int __init init_dmars(void)
1972 } 2242 }
1973 } 2243 }
1974 2244
1975#ifdef CONFIG_INTR_REMAP 2245 /*
1976 if (!intr_remapping_enabled) { 2246 * If pass through is set and enabled, context entries of all pci
1977 ret = enable_intr_remapping(0); 2247 * devices are intialized by pass through translation type.
1978 if (ret) 2248 */
1979 printk(KERN_ERR 2249 if (iommu_pass_through) {
1980 "IOMMU: enable interrupt remapping failed\n"); 2250 ret = init_context_pass_through();
2251 if (ret) {
2252 printk(KERN_ERR "IOMMU: Pass through init failed.\n");
2253 iommu_pass_through = 0;
2254 }
1981 } 2255 }
1982#endif
1983 2256
1984 /* 2257 /*
1985 * For each rmrr 2258 * If pass through is not set or not enabled, setup context entries for
1986 * for each dev attached to rmrr 2259 * identity mappings for rmrr, gfx, and isa and may fall back to static
1987 * do 2260 * identity mapping if iommu_identity_mapping is set.
1988 * locate drhd for dev, alloc domain for dev
1989 * allocate free domain
1990 * allocate page table entries for rmrr
1991 * if context not allocated for bus
1992 * allocate and init context
1993 * set present in root table for this bus
1994 * init context with domain, translation etc
1995 * endfor
1996 * endfor
1997 */ 2261 */
1998 for_each_rmrr_units(rmrr) { 2262 if (!iommu_pass_through) {
1999 for (i = 0; i < rmrr->devices_cnt; i++) { 2263 if (iommu_identity_mapping)
2000 pdev = rmrr->devices[i]; 2264 iommu_prepare_static_identity_mapping();
2001 /* some BIOS lists non-exist devices in DMAR table */ 2265 /*
2002 if (!pdev) 2266 * For each rmrr
2003 continue; 2267 * for each dev attached to rmrr
2004 ret = iommu_prepare_rmrr_dev(rmrr, pdev); 2268 * do
2005 if (ret) 2269 * locate drhd for dev, alloc domain for dev
2006 printk(KERN_ERR 2270 * allocate free domain
2271 * allocate page table entries for rmrr
2272 * if context not allocated for bus
2273 * allocate and init context
2274 * set present in root table for this bus
2275 * init context with domain, translation etc
2276 * endfor
2277 * endfor
2278 */
2279 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2280 for_each_rmrr_units(rmrr) {
2281 for (i = 0; i < rmrr->devices_cnt; i++) {
2282 pdev = rmrr->devices[i];
2283 /*
2284 * some BIOS lists non-exist devices in DMAR
2285 * table.
2286 */
2287 if (!pdev)
2288 continue;
2289 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2290 if (ret)
2291 printk(KERN_ERR
2007 "IOMMU: mapping reserved region failed\n"); 2292 "IOMMU: mapping reserved region failed\n");
2293 }
2008 } 2294 }
2009 }
2010 2295
2011 iommu_prepare_gfx_mapping(); 2296 iommu_prepare_gfx_mapping();
2012 2297
2013 iommu_prepare_isa(); 2298 iommu_prepare_isa();
2299 }
2014 2300
2015 /* 2301 /*
2016 * for each drhd 2302 * for each drhd
@@ -2032,10 +2318,8 @@ static int __init init_dmars(void)
2032 2318
2033 iommu_set_root_entry(iommu); 2319 iommu_set_root_entry(iommu);
2034 2320
2035 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, 2321 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2036 0); 2322 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2037 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2038 0);
2039 iommu_disable_protect_mem_regions(iommu); 2323 iommu_disable_protect_mem_regions(iommu);
2040 2324
2041 ret = iommu_enable_translation(iommu); 2325 ret = iommu_enable_translation(iommu);
@@ -2121,7 +2405,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
2121 2405
2122 /* make sure context mapping is ok */ 2406 /* make sure context mapping is ok */
2123 if (unlikely(!domain_context_mapped(pdev))) { 2407 if (unlikely(!domain_context_mapped(pdev))) {
2124 ret = domain_context_mapping(domain, pdev); 2408 ret = domain_context_mapping(domain, pdev,
2409 CONTEXT_TT_MULTI_LEVEL);
2125 if (ret) { 2410 if (ret) {
2126 printk(KERN_ERR 2411 printk(KERN_ERR
2127 "Domain context map for %s failed", 2412 "Domain context map for %s failed",
@@ -2133,6 +2418,52 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
2133 return domain; 2418 return domain;
2134} 2419}
2135 2420
2421static int iommu_dummy(struct pci_dev *pdev)
2422{
2423 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2424}
2425
2426/* Check if the pdev needs to go through non-identity map and unmap process.*/
2427static int iommu_no_mapping(struct pci_dev *pdev)
2428{
2429 int found;
2430
2431 if (!iommu_identity_mapping)
2432 return iommu_dummy(pdev);
2433
2434 found = identity_mapping(pdev);
2435 if (found) {
2436 if (pdev->dma_mask > DMA_BIT_MASK(32))
2437 return 1;
2438 else {
2439 /*
2440 * 32 bit DMA is removed from si_domain and fall back
2441 * to non-identity mapping.
2442 */
2443 domain_remove_one_dev_info(si_domain, pdev);
2444 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2445 pci_name(pdev));
2446 return 0;
2447 }
2448 } else {
2449 /*
2450 * In case of a detached 64 bit DMA device from vm, the device
2451 * is put into si_domain for identity mapping.
2452 */
2453 if (pdev->dma_mask > DMA_BIT_MASK(32)) {
2454 int ret;
2455 ret = domain_add_dev_info(si_domain, pdev);
2456 if (!ret) {
2457 printk(KERN_INFO "64bit %s uses identity mapping\n",
2458 pci_name(pdev));
2459 return 1;
2460 }
2461 }
2462 }
2463
2464 return iommu_dummy(pdev);
2465}
2466
2136static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, 2467static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2137 size_t size, int dir, u64 dma_mask) 2468 size_t size, int dir, u64 dma_mask)
2138{ 2469{
@@ -2145,7 +2476,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2145 struct intel_iommu *iommu; 2476 struct intel_iommu *iommu;
2146 2477
2147 BUG_ON(dir == DMA_NONE); 2478 BUG_ON(dir == DMA_NONE);
2148 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2479
2480 if (iommu_no_mapping(pdev))
2149 return paddr; 2481 return paddr;
2150 2482
2151 domain = get_valid_domain_for_dev(pdev); 2483 domain = get_valid_domain_for_dev(pdev);
@@ -2182,10 +2514,11 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2182 if (ret) 2514 if (ret)
2183 goto error; 2515 goto error;
2184 2516
2185 /* it's a non-present to present mapping */ 2517 /* it's a non-present to present mapping. Only flush if caching mode */
2186 ret = iommu_flush_iotlb_psi(iommu, domain->id, 2518 if (cap_caching_mode(iommu->cap))
2187 start_paddr, size >> VTD_PAGE_SHIFT, 1); 2519 iommu_flush_iotlb_psi(iommu, 0, start_paddr,
2188 if (ret) 2520 size >> VTD_PAGE_SHIFT);
2521 else
2189 iommu_flush_write_buffer(iommu); 2522 iommu_flush_write_buffer(iommu);
2190 2523
2191 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2524 return start_paddr + ((u64)paddr & (~PAGE_MASK));
@@ -2219,15 +2552,22 @@ static void flush_unmaps(void)
2219 if (!iommu) 2552 if (!iommu)
2220 continue; 2553 continue;
2221 2554
2222 if (deferred_flush[i].next) { 2555 if (!deferred_flush[i].next)
2223 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2556 continue;
2224 DMA_TLB_GLOBAL_FLUSH, 0); 2557
2225 for (j = 0; j < deferred_flush[i].next; j++) { 2558 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2226 __free_iova(&deferred_flush[i].domain[j]->iovad, 2559 DMA_TLB_GLOBAL_FLUSH);
2227 deferred_flush[i].iova[j]); 2560 for (j = 0; j < deferred_flush[i].next; j++) {
2228 } 2561 unsigned long mask;
2229 deferred_flush[i].next = 0; 2562 struct iova *iova = deferred_flush[i].iova[j];
2563
2564 mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT;
2565 mask = ilog2(mask >> VTD_PAGE_SHIFT);
2566 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2567 iova->pfn_lo << PAGE_SHIFT, mask);
2568 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2230 } 2569 }
2570 deferred_flush[i].next = 0;
2231 } 2571 }
2232 2572
2233 list_size = 0; 2573 list_size = 0;
@@ -2278,8 +2618,9 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2278 struct iova *iova; 2618 struct iova *iova;
2279 struct intel_iommu *iommu; 2619 struct intel_iommu *iommu;
2280 2620
2281 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2621 if (iommu_no_mapping(pdev))
2282 return; 2622 return;
2623
2283 domain = find_domain(pdev); 2624 domain = find_domain(pdev);
2284 BUG_ON(!domain); 2625 BUG_ON(!domain);
2285 2626
@@ -2300,9 +2641,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2300 /* free page tables */ 2641 /* free page tables */
2301 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2642 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2302 if (intel_iommu_strict) { 2643 if (intel_iommu_strict) {
2303 if (iommu_flush_iotlb_psi(iommu, 2644 iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2304 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) 2645 size >> VTD_PAGE_SHIFT);
2305 iommu_flush_write_buffer(iommu);
2306 /* free iova */ 2646 /* free iova */
2307 __free_iova(&domain->iovad, iova); 2647 __free_iova(&domain->iovad, iova);
2308 } else { 2648 } else {
@@ -2370,7 +2710,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2370 struct scatterlist *sg; 2710 struct scatterlist *sg;
2371 struct intel_iommu *iommu; 2711 struct intel_iommu *iommu;
2372 2712
2373 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2713 if (iommu_no_mapping(pdev))
2374 return; 2714 return;
2375 2715
2376 domain = find_domain(pdev); 2716 domain = find_domain(pdev);
@@ -2393,9 +2733,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2393 /* free page tables */ 2733 /* free page tables */
2394 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2734 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2395 2735
2396 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2736 iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2397 size >> VTD_PAGE_SHIFT, 0)) 2737 size >> VTD_PAGE_SHIFT);
2398 iommu_flush_write_buffer(iommu);
2399 2738
2400 /* free iova */ 2739 /* free iova */
2401 __free_iova(&domain->iovad, iova); 2740 __free_iova(&domain->iovad, iova);
@@ -2432,7 +2771,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2432 struct intel_iommu *iommu; 2771 struct intel_iommu *iommu;
2433 2772
2434 BUG_ON(dir == DMA_NONE); 2773 BUG_ON(dir == DMA_NONE);
2435 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2774 if (iommu_no_mapping(pdev))
2436 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); 2775 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2437 2776
2438 domain = get_valid_domain_for_dev(pdev); 2777 domain = get_valid_domain_for_dev(pdev);
@@ -2487,10 +2826,13 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2487 offset += size; 2826 offset += size;
2488 } 2827 }
2489 2828
2490 /* it's a non-present to present mapping */ 2829 /* it's a non-present to present mapping. Only flush if caching mode */
2491 if (iommu_flush_iotlb_psi(iommu, domain->id, 2830 if (cap_caching_mode(iommu->cap))
2492 start_addr, offset >> VTD_PAGE_SHIFT, 1)) 2831 iommu_flush_iotlb_psi(iommu, 0, start_addr,
2832 offset >> VTD_PAGE_SHIFT);
2833 else
2493 iommu_flush_write_buffer(iommu); 2834 iommu_flush_write_buffer(iommu);
2835
2494 return nelems; 2836 return nelems;
2495} 2837}
2496 2838
@@ -2649,9 +2991,9 @@ static int init_iommu_hw(void)
2649 iommu_set_root_entry(iommu); 2991 iommu_set_root_entry(iommu);
2650 2992
2651 iommu->flush.flush_context(iommu, 0, 0, 0, 2993 iommu->flush.flush_context(iommu, 0, 0, 0,
2652 DMA_CCMD_GLOBAL_INVL, 0); 2994 DMA_CCMD_GLOBAL_INVL);
2653 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2995 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2654 DMA_TLB_GLOBAL_FLUSH, 0); 2996 DMA_TLB_GLOBAL_FLUSH);
2655 iommu_disable_protect_mem_regions(iommu); 2997 iommu_disable_protect_mem_regions(iommu);
2656 iommu_enable_translation(iommu); 2998 iommu_enable_translation(iommu);
2657 } 2999 }
@@ -2666,9 +3008,9 @@ static void iommu_flush_all(void)
2666 3008
2667 for_each_active_iommu(iommu, drhd) { 3009 for_each_active_iommu(iommu, drhd) {
2668 iommu->flush.flush_context(iommu, 0, 0, 0, 3010 iommu->flush.flush_context(iommu, 0, 0, 0,
2669 DMA_CCMD_GLOBAL_INVL, 0); 3011 DMA_CCMD_GLOBAL_INVL);
2670 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3012 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2671 DMA_TLB_GLOBAL_FLUSH, 0); 3013 DMA_TLB_GLOBAL_FLUSH);
2672 } 3014 }
2673} 3015}
2674 3016
@@ -2791,7 +3133,7 @@ int __init intel_iommu_init(void)
2791 * Check the need for DMA-remapping initialization now. 3133 * Check the need for DMA-remapping initialization now.
2792 * Above initialization will also be used by Interrupt-remapping. 3134 * Above initialization will also be used by Interrupt-remapping.
2793 */ 3135 */
2794 if (no_iommu || swiotlb || dmar_disabled) 3136 if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled)
2795 return -ENODEV; 3137 return -ENODEV;
2796 3138
2797 iommu_init_mempool(); 3139 iommu_init_mempool();
@@ -2811,35 +3153,18 @@ int __init intel_iommu_init(void)
2811 3153
2812 init_timer(&unmap_timer); 3154 init_timer(&unmap_timer);
2813 force_iommu = 1; 3155 force_iommu = 1;
2814 dma_ops = &intel_dma_ops;
2815 init_iommu_sysfs();
2816
2817 register_iommu(&intel_iommu_ops);
2818 3156
2819 return 0; 3157 if (!iommu_pass_through) {
2820} 3158 printk(KERN_INFO
3159 "Multi-level page-table translation for DMAR.\n");
3160 dma_ops = &intel_dma_ops;
3161 } else
3162 printk(KERN_INFO
3163 "DMAR: Pass through translation for DMAR.\n");
2821 3164
2822static int vm_domain_add_dev_info(struct dmar_domain *domain, 3165 init_iommu_sysfs();
2823 struct pci_dev *pdev)
2824{
2825 struct device_domain_info *info;
2826 unsigned long flags;
2827
2828 info = alloc_devinfo_mem();
2829 if (!info)
2830 return -ENOMEM;
2831
2832 info->segment = pci_domain_nr(pdev->bus);
2833 info->bus = pdev->bus->number;
2834 info->devfn = pdev->devfn;
2835 info->dev = pdev;
2836 info->domain = domain;
2837 3166
2838 spin_lock_irqsave(&device_domain_lock, flags); 3167 register_iommu(&intel_iommu_ops);
2839 list_add(&info->link, &domain->devices);
2840 list_add(&info->global, &device_domain_list);
2841 pdev->dev.archdata.iommu = info;
2842 spin_unlock_irqrestore(&device_domain_lock, flags);
2843 3168
2844 return 0; 3169 return 0;
2845} 3170}
@@ -2871,7 +3196,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
2871 } 3196 }
2872} 3197}
2873 3198
2874static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, 3199static void domain_remove_one_dev_info(struct dmar_domain *domain,
2875 struct pci_dev *pdev) 3200 struct pci_dev *pdev)
2876{ 3201{
2877 struct device_domain_info *info; 3202 struct device_domain_info *info;
@@ -2897,6 +3222,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2897 info->dev->dev.archdata.iommu = NULL; 3222 info->dev->dev.archdata.iommu = NULL;
2898 spin_unlock_irqrestore(&device_domain_lock, flags); 3223 spin_unlock_irqrestore(&device_domain_lock, flags);
2899 3224
3225 iommu_disable_dev_iotlb(info);
2900 iommu_detach_dev(iommu, info->bus, info->devfn); 3226 iommu_detach_dev(iommu, info->bus, info->devfn);
2901 iommu_detach_dependent_devices(iommu, pdev); 3227 iommu_detach_dependent_devices(iommu, pdev);
2902 free_devinfo_mem(info); 3228 free_devinfo_mem(info);
@@ -2947,6 +3273,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2947 3273
2948 spin_unlock_irqrestore(&device_domain_lock, flags1); 3274 spin_unlock_irqrestore(&device_domain_lock, flags1);
2949 3275
3276 iommu_disable_dev_iotlb(info);
2950 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 3277 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
2951 iommu_detach_dev(iommu, info->bus, info->devfn); 3278 iommu_detach_dev(iommu, info->bus, info->devfn);
2952 iommu_detach_dependent_devices(iommu, info->dev); 3279 iommu_detach_dependent_devices(iommu, info->dev);
@@ -3002,7 +3329,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
3002 return domain; 3329 return domain;
3003} 3330}
3004 3331
3005static int vm_domain_init(struct dmar_domain *domain, int guest_width) 3332static int md_domain_init(struct dmar_domain *domain, int guest_width)
3006{ 3333{
3007 int adjust_width; 3334 int adjust_width;
3008 3335
@@ -3093,7 +3420,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
3093 "intel_iommu_domain_init: dmar_domain == NULL\n"); 3420 "intel_iommu_domain_init: dmar_domain == NULL\n");
3094 return -ENOMEM; 3421 return -ENOMEM;
3095 } 3422 }
3096 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 3423 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3097 printk(KERN_ERR 3424 printk(KERN_ERR
3098 "intel_iommu_domain_init() failed\n"); 3425 "intel_iommu_domain_init() failed\n");
3099 vm_domain_exit(dmar_domain); 3426 vm_domain_exit(dmar_domain);
@@ -3128,8 +3455,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3128 3455
3129 old_domain = find_domain(pdev); 3456 old_domain = find_domain(pdev);
3130 if (old_domain) { 3457 if (old_domain) {
3131 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 3458 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3132 vm_domain_remove_one_dev_info(old_domain, pdev); 3459 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3460 domain_remove_one_dev_info(old_domain, pdev);
3133 else 3461 else
3134 domain_remove_dev_info(old_domain); 3462 domain_remove_dev_info(old_domain);
3135 } 3463 }
@@ -3151,11 +3479,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3151 return -EFAULT; 3479 return -EFAULT;
3152 } 3480 }
3153 3481
3154 ret = domain_context_mapping(dmar_domain, pdev); 3482 ret = domain_add_dev_info(dmar_domain, pdev);
3155 if (ret) 3483 if (ret)
3156 return ret; 3484 return ret;
3157 3485
3158 ret = vm_domain_add_dev_info(dmar_domain, pdev); 3486 ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3159 return ret; 3487 return ret;
3160} 3488}
3161 3489
@@ -3165,7 +3493,7 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
3165 struct dmar_domain *dmar_domain = domain->priv; 3493 struct dmar_domain *dmar_domain = domain->priv;
3166 struct pci_dev *pdev = to_pci_dev(dev); 3494 struct pci_dev *pdev = to_pci_dev(dev);
3167 3495
3168 vm_domain_remove_one_dev_info(dmar_domain, pdev); 3496 domain_remove_one_dev_info(dmar_domain, pdev);
3169} 3497}
3170 3498
3171static int intel_iommu_map_range(struct iommu_domain *domain, 3499static int intel_iommu_map_range(struct iommu_domain *domain,