aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/kvm_host.h2
-rw-r--r--arch/ia64/kvm/Makefile4
-rw-r--r--arch/ia64/kvm/kvm-ia64.c3
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h61
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kernel/amd_iommu.c664
-rw-r--r--arch/x86/kernel/amd_iommu_init.c15
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--drivers/base/Makefile1
-rw-r--r--drivers/base/iommu.c100
-rw-r--r--drivers/pci/dmar.c46
-rw-r--r--drivers/pci/intel-iommu.c944
-rw-r--r--include/linux/dma_remapping.h138
-rw-r--r--include/linux/dmar.h1
-rw-r--r--include/linux/intel-iommu.h25
-rw-r--r--include/linux/iommu.h112
-rw-r--r--include/linux/kvm_host.h30
-rw-r--r--virt/kvm/iommu.c (renamed from virt/kvm/vtd.c)135
-rw-r--r--virt/kvm/kvm_main.c50
22 files changed, 1893 insertions, 463 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 7fa8f615ba6e..3d31636cbafb 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -687,3 +687,6 @@ config IRQ_PER_CPU
687 687
688config IOMMU_HELPER 688config IOMMU_HELPER
689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) 689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
690
691config IOMMU_API
692 def_bool (DMAR)
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 0560f3fae538..348663661659 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -467,7 +467,7 @@ struct kvm_arch {
467 struct kvm_sal_data rdv_sal_data; 467 struct kvm_sal_data rdv_sal_data;
468 468
469 struct list_head assigned_dev_head; 469 struct list_head assigned_dev_head;
470 struct dmar_domain *intel_iommu_domain; 470 struct iommu_domain *iommu_domain;
471 struct hlist_head irq_ack_notifier_list; 471 struct hlist_head irq_ack_notifier_list;
472 472
473 unsigned long irq_sources_bitmap; 473 unsigned long irq_sources_bitmap;
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 76464dc312e6..0bb99b732908 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,8 +51,8 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
51common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ 51common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
52 coalesced_mmio.o irq_comm.o) 52 coalesced_mmio.o irq_comm.o)
53 53
54ifeq ($(CONFIG_DMAR),y) 54ifeq ($(CONFIG_IOMMU_API),y)
55common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 55common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
56endif 56endif
57 57
58kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o 58kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0f5ebd948437..4e586f6110aa 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
31#include <linux/bitops.h> 31#include <linux/bitops.h>
32#include <linux/hrtimer.h> 32#include <linux/hrtimer.h>
33#include <linux/uaccess.h> 33#include <linux/uaccess.h>
34#include <linux/iommu.h>
34#include <linux/intel-iommu.h> 35#include <linux/intel-iommu.h>
35 36
36#include <asm/pgtable.h> 37#include <asm/pgtable.h>
@@ -188,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext)
188 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 189 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
189 break; 190 break;
190 case KVM_CAP_IOMMU: 191 case KVM_CAP_IOMMU:
191 r = intel_iommu_found(); 192 r = iommu_found();
192 break; 193 break;
193 default: 194 default:
194 r = 0; 195 r = 0;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 249d1e0824b5..862adb9bf0d4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,6 +586,16 @@ config AMD_IOMMU
586 your BIOS for an option to enable it or if you have an IVRS ACPI 586 your BIOS for an option to enable it or if you have an IVRS ACPI
587 table. 587 table.
588 588
589config AMD_IOMMU_STATS
590 bool "Export AMD IOMMU statistics to debugfs"
591 depends on AMD_IOMMU
592 select DEBUG_FS
593 help
594 This option enables code in the AMD IOMMU driver to collect various
595 statistics about whats happening in the driver and exports that
596 information to userspace via debugfs.
597 If unsure, say N.
598
589# need this always selected by IOMMU for the VIA workaround 599# need this always selected by IOMMU for the VIA workaround
590config SWIOTLB 600config SWIOTLB
591 def_bool y if X86_64 601 def_bool y if X86_64
@@ -599,6 +609,9 @@ config SWIOTLB
599config IOMMU_HELPER 609config IOMMU_HELPER
600 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 610 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
601 611
612config IOMMU_API
613 def_bool (AMD_IOMMU || DMAR)
614
602config MAXSMP 615config MAXSMP
603 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 616 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
604 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL 617 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ac302a2fa339..95c8cd9d22b5 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -190,16 +190,23 @@
190/* FIXME: move this macro to <linux/pci.h> */ 190/* FIXME: move this macro to <linux/pci.h> */
191#define PCI_BUS(x) (((x) >> 8) & 0xff) 191#define PCI_BUS(x) (((x) >> 8) & 0xff)
192 192
193/* Protection domain flags */
194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
196 domain for an IOMMU */
197
193/* 198/*
194 * This structure contains generic data for IOMMU protection domains 199 * This structure contains generic data for IOMMU protection domains
195 * independent of their use. 200 * independent of their use.
196 */ 201 */
197struct protection_domain { 202struct protection_domain {
198 spinlock_t lock; /* mostly used to lock the page table*/ 203 spinlock_t lock; /* mostly used to lock the page table*/
199 u16 id; /* the domain id written to the device table */ 204 u16 id; /* the domain id written to the device table */
200 int mode; /* paging mode (0-6 levels) */ 205 int mode; /* paging mode (0-6 levels) */
201 u64 *pt_root; /* page table root pointer */ 206 u64 *pt_root; /* page table root pointer */
202 void *priv; /* private data */ 207 unsigned long flags; /* flags to find out type of domain */
208 unsigned dev_cnt; /* devices assigned to this domain */
209 void *priv; /* private data */
203}; 210};
204 211
205/* 212/*
@@ -295,7 +302,7 @@ struct amd_iommu {
295 bool int_enabled; 302 bool int_enabled;
296 303
297 /* if one, we need to send a completion wait command */ 304 /* if one, we need to send a completion wait command */
298 int need_sync; 305 bool need_sync;
299 306
300 /* default dma_ops domain for that IOMMU */ 307 /* default dma_ops domain for that IOMMU */
301 struct dma_ops_domain *default_dom; 308 struct dma_ops_domain *default_dom;
@@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
374extern unsigned long *amd_iommu_pd_alloc_bitmap; 381extern unsigned long *amd_iommu_pd_alloc_bitmap;
375 382
376/* will be 1 if device isolation is enabled */ 383/* will be 1 if device isolation is enabled */
377extern int amd_iommu_isolate; 384extern bool amd_iommu_isolate;
378 385
379/* 386/*
380 * If true, the addresses will be flushed on unmap time, not when 387 * If true, the addresses will be flushed on unmap time, not when
@@ -382,18 +389,6 @@ extern int amd_iommu_isolate;
382 */ 389 */
383extern bool amd_iommu_unmap_flush; 390extern bool amd_iommu_unmap_flush;
384 391
385/* takes a PCI device id and prints it out in a readable form */
386static inline void print_devid(u16 devid, int nl)
387{
388 int bus = devid >> 8;
389 int dev = devid >> 3 & 0x1f;
390 int fn = devid & 0x07;
391
392 printk("%02x:%02x.%x", bus, dev, fn);
393 if (nl)
394 printk("\n");
395}
396
397/* takes bus and device/function and returns the device id 392/* takes bus and device/function and returns the device id
398 * FIXME: should that be in generic PCI code? */ 393 * FIXME: should that be in generic PCI code? */
399static inline u16 calc_devid(u8 bus, u8 devfn) 394static inline u16 calc_devid(u8 bus, u8 devfn)
@@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
401 return (((u16)bus) << 8) | devfn; 396 return (((u16)bus) << 8) | devfn;
402} 397}
403 398
399#ifdef CONFIG_AMD_IOMMU_STATS
400
401struct __iommu_counter {
402 char *name;
403 struct dentry *dent;
404 u64 value;
405};
406
407#define DECLARE_STATS_COUNTER(nm) \
408 static struct __iommu_counter nm = { \
409 .name = #nm, \
410 }
411
412#define INC_STATS_COUNTER(name) name.value += 1
413#define ADD_STATS_COUNTER(name, x) name.value += (x)
414#define SUB_STATS_COUNTER(name, x) name.value -= (x)
415
416#else /* CONFIG_AMD_IOMMU_STATS */
417
418#define DECLARE_STATS_COUNTER(name)
419#define INC_STATS_COUNTER(name)
420#define ADD_STATS_COUNTER(name, x)
421#define SUB_STATS_COUNTER(name, x)
422
423static inline void amd_iommu_stats_init(void) { }
424
425#endif /* CONFIG_AMD_IOMMU_STATS */
426
404#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ 427#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 97215a458e5f..730843d1d2fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -360,7 +360,7 @@ struct kvm_arch{
360 struct list_head active_mmu_pages; 360 struct list_head active_mmu_pages;
361 struct list_head assigned_dev_head; 361 struct list_head assigned_dev_head;
362 struct list_head oos_global_pages; 362 struct list_head oos_global_pages;
363 struct dmar_domain *intel_iommu_domain; 363 struct iommu_domain *iommu_domain;
364 struct kvm_pic *vpic; 364 struct kvm_pic *vpic;
365 struct kvm_ioapic *vioapic; 365 struct kvm_ioapic *vioapic;
366 struct kvm_pit *vpit; 366 struct kvm_pit *vpit;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 658e29e0f49b..5113c080f0c4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,8 +20,12 @@
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/debugfs.h>
23#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
24#include <linux/iommu-helper.h> 25#include <linux/iommu-helper.h>
26#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h>
28#endif
25#include <asm/proto.h> 29#include <asm/proto.h>
26#include <asm/iommu.h> 30#include <asm/iommu.h>
27#include <asm/gart.h> 31#include <asm/gart.h>
@@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
38static LIST_HEAD(iommu_pd_list); 42static LIST_HEAD(iommu_pd_list);
39static DEFINE_SPINLOCK(iommu_pd_list_lock); 43static DEFINE_SPINLOCK(iommu_pd_list_lock);
40 44
45#ifdef CONFIG_IOMMU_API
46static struct iommu_ops amd_iommu_ops;
47#endif
48
41/* 49/*
42 * general struct to manage commands send to an IOMMU 50 * general struct to manage commands send to an IOMMU
43 */ 51 */
@@ -47,6 +55,68 @@ struct iommu_cmd {
47 55
48static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 56static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
49 struct unity_map_entry *e); 57 struct unity_map_entry *e);
58static struct dma_ops_domain *find_protection_domain(u16 devid);
59
60
61#ifdef CONFIG_AMD_IOMMU_STATS
62
63/*
64 * Initialization code for statistics collection
65 */
66
67DECLARE_STATS_COUNTER(compl_wait);
68DECLARE_STATS_COUNTER(cnt_map_single);
69DECLARE_STATS_COUNTER(cnt_unmap_single);
70DECLARE_STATS_COUNTER(cnt_map_sg);
71DECLARE_STATS_COUNTER(cnt_unmap_sg);
72DECLARE_STATS_COUNTER(cnt_alloc_coherent);
73DECLARE_STATS_COUNTER(cnt_free_coherent);
74DECLARE_STATS_COUNTER(cross_page);
75DECLARE_STATS_COUNTER(domain_flush_single);
76DECLARE_STATS_COUNTER(domain_flush_all);
77DECLARE_STATS_COUNTER(alloced_io_mem);
78DECLARE_STATS_COUNTER(total_map_requests);
79
80static struct dentry *stats_dir;
81static struct dentry *de_isolate;
82static struct dentry *de_fflush;
83
84static void amd_iommu_stats_add(struct __iommu_counter *cnt)
85{
86 if (stats_dir == NULL)
87 return;
88
89 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
90 &cnt->value);
91}
92
93static void amd_iommu_stats_init(void)
94{
95 stats_dir = debugfs_create_dir("amd-iommu", NULL);
96 if (stats_dir == NULL)
97 return;
98
99 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
100 (u32 *)&amd_iommu_isolate);
101
102 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
103 (u32 *)&amd_iommu_unmap_flush);
104
105 amd_iommu_stats_add(&compl_wait);
106 amd_iommu_stats_add(&cnt_map_single);
107 amd_iommu_stats_add(&cnt_unmap_single);
108 amd_iommu_stats_add(&cnt_map_sg);
109 amd_iommu_stats_add(&cnt_unmap_sg);
110 amd_iommu_stats_add(&cnt_alloc_coherent);
111 amd_iommu_stats_add(&cnt_free_coherent);
112 amd_iommu_stats_add(&cross_page);
113 amd_iommu_stats_add(&domain_flush_single);
114 amd_iommu_stats_add(&domain_flush_all);
115 amd_iommu_stats_add(&alloced_io_mem);
116 amd_iommu_stats_add(&total_map_requests);
117}
118
119#endif
50 120
51/* returns !0 if the IOMMU is caching non-present entries in its TLB */ 121/* returns !0 if the IOMMU is caching non-present entries in its TLB */
52static int iommu_has_npcache(struct amd_iommu *iommu) 122static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -189,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
189 spin_lock_irqsave(&iommu->lock, flags); 259 spin_lock_irqsave(&iommu->lock, flags);
190 ret = __iommu_queue_command(iommu, cmd); 260 ret = __iommu_queue_command(iommu, cmd);
191 if (!ret) 261 if (!ret)
192 iommu->need_sync = 1; 262 iommu->need_sync = true;
193 spin_unlock_irqrestore(&iommu->lock, flags); 263 spin_unlock_irqrestore(&iommu->lock, flags);
194 264
195 return ret; 265 return ret;
196} 266}
197 267
198/* 268/*
269 * This function waits until an IOMMU has completed a completion
270 * wait command
271 */
272static void __iommu_wait_for_completion(struct amd_iommu *iommu)
273{
274 int ready = 0;
275 unsigned status = 0;
276 unsigned long i = 0;
277
278 INC_STATS_COUNTER(compl_wait);
279
280 while (!ready && (i < EXIT_LOOP_COUNT)) {
281 ++i;
282 /* wait for the bit to become one */
283 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
284 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
285 }
286
287 /* set bit back to zero */
288 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
289 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
290
291 if (unlikely(i == EXIT_LOOP_COUNT))
292 panic("AMD IOMMU: Completion wait loop failed\n");
293}
294
295/*
296 * This function queues a completion wait command into the command
297 * buffer of an IOMMU
298 */
299static int __iommu_completion_wait(struct amd_iommu *iommu)
300{
301 struct iommu_cmd cmd;
302
303 memset(&cmd, 0, sizeof(cmd));
304 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
305 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
306
307 return __iommu_queue_command(iommu, &cmd);
308}
309
310/*
199 * This function is called whenever we need to ensure that the IOMMU has 311 * This function is called whenever we need to ensure that the IOMMU has
200 * completed execution of all commands we sent. It sends a 312 * completed execution of all commands we sent. It sends a
201 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 313 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
@@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
204 */ 316 */
205static int iommu_completion_wait(struct amd_iommu *iommu) 317static int iommu_completion_wait(struct amd_iommu *iommu)
206{ 318{
207 int ret = 0, ready = 0; 319 int ret = 0;
208 unsigned status = 0; 320 unsigned long flags;
209 struct iommu_cmd cmd;
210 unsigned long flags, i = 0;
211
212 memset(&cmd, 0, sizeof(cmd));
213 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
214 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
215 321
216 spin_lock_irqsave(&iommu->lock, flags); 322 spin_lock_irqsave(&iommu->lock, flags);
217 323
218 if (!iommu->need_sync) 324 if (!iommu->need_sync)
219 goto out; 325 goto out;
220 326
221 iommu->need_sync = 0; 327 ret = __iommu_completion_wait(iommu);
222 328
223 ret = __iommu_queue_command(iommu, &cmd); 329 iommu->need_sync = false;
224 330
225 if (ret) 331 if (ret)
226 goto out; 332 goto out;
227 333
228 while (!ready && (i < EXIT_LOOP_COUNT)) { 334 __iommu_wait_for_completion(iommu);
229 ++i;
230 /* wait for the bit to become one */
231 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
232 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
233 }
234
235 /* set bit back to zero */
236 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
237 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
238
239 if (unlikely(i == EXIT_LOOP_COUNT))
240 panic("AMD IOMMU: Completion wait loop failed\n");
241 335
242out: 336out:
243 spin_unlock_irqrestore(&iommu->lock, flags); 337 spin_unlock_irqrestore(&iommu->lock, flags);
@@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
264 return ret; 358 return ret;
265} 359}
266 360
361static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
362 u16 domid, int pde, int s)
363{
364 memset(cmd, 0, sizeof(*cmd));
365 address &= PAGE_MASK;
366 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
367 cmd->data[1] |= domid;
368 cmd->data[2] = lower_32_bits(address);
369 cmd->data[3] = upper_32_bits(address);
370 if (s) /* size bit - we flush more than one 4kb page */
371 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
372 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
373 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
374}
375
267/* 376/*
268 * Generic command send function for invalidaing TLB entries 377 * Generic command send function for invalidaing TLB entries
269 */ 378 */
@@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
273 struct iommu_cmd cmd; 382 struct iommu_cmd cmd;
274 int ret; 383 int ret;
275 384
276 memset(&cmd, 0, sizeof(cmd)); 385 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
277 address &= PAGE_MASK;
278 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
279 cmd.data[1] |= domid;
280 cmd.data[2] = lower_32_bits(address);
281 cmd.data[3] = upper_32_bits(address);
282 if (s) /* size bit - we flush more than one 4kb page */
283 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
284 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
285 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
286 386
287 ret = iommu_queue_command(iommu, &cmd); 387 ret = iommu_queue_command(iommu, &cmd);
288 388
@@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
321{ 421{
322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
323 423
424 INC_STATS_COUNTER(domain_flush_single);
425
324 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 426 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
325} 427}
326 428
429/*
430 * This function is used to flush the IO/TLB for a given protection domain
431 * on every IOMMU in the system
432 */
433static void iommu_flush_domain(u16 domid)
434{
435 unsigned long flags;
436 struct amd_iommu *iommu;
437 struct iommu_cmd cmd;
438
439 INC_STATS_COUNTER(domain_flush_all);
440
441 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
442 domid, 1, 1);
443
444 list_for_each_entry(iommu, &amd_iommu_list, list) {
445 spin_lock_irqsave(&iommu->lock, flags);
446 __iommu_queue_command(iommu, &cmd);
447 __iommu_completion_wait(iommu);
448 __iommu_wait_for_completion(iommu);
449 spin_unlock_irqrestore(&iommu->lock, flags);
450 }
451}
452
327/**************************************************************************** 453/****************************************************************************
328 * 454 *
329 * The functions below are used the create the page table mappings for 455 * The functions below are used the create the page table mappings for
@@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
338 * supporting all features of AMD IOMMU page tables like level skipping 464 * supporting all features of AMD IOMMU page tables like level skipping
339 * and full 64 bit address spaces. 465 * and full 64 bit address spaces.
340 */ 466 */
341static int iommu_map(struct protection_domain *dom, 467static int iommu_map_page(struct protection_domain *dom,
342 unsigned long bus_addr, 468 unsigned long bus_addr,
343 unsigned long phys_addr, 469 unsigned long phys_addr,
344 int prot) 470 int prot)
345{ 471{
346 u64 __pte, *pte, *page; 472 u64 __pte, *pte, *page;
347 473
@@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom,
388 return 0; 514 return 0;
389} 515}
390 516
517static void iommu_unmap_page(struct protection_domain *dom,
518 unsigned long bus_addr)
519{
520 u64 *pte;
521
522 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
523
524 if (!IOMMU_PTE_PRESENT(*pte))
525 return;
526
527 pte = IOMMU_PTE_PAGE(*pte);
528 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
529
530 if (!IOMMU_PTE_PRESENT(*pte))
531 return;
532
533 pte = IOMMU_PTE_PAGE(*pte);
534 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
535
536 *pte = 0;
537}
538
391/* 539/*
392 * This function checks if a specific unity mapping entry is needed for 540 * This function checks if a specific unity mapping entry is needed for
393 * this specific IOMMU. 541 * this specific IOMMU.
@@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
440 588
441 for (addr = e->address_start; addr < e->address_end; 589 for (addr = e->address_start; addr < e->address_end;
442 addr += PAGE_SIZE) { 590 addr += PAGE_SIZE) {
443 ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 591 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
444 if (ret) 592 if (ret)
445 return ret; 593 return ret;
446 /* 594 /*
@@ -571,6 +719,16 @@ static u16 domain_id_alloc(void)
571 return id; 719 return id;
572} 720}
573 721
722static void domain_id_free(int id)
723{
724 unsigned long flags;
725
726 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
727 if (id > 0 && id < MAX_DOMAIN_ID)
728 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
729 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
730}
731
574/* 732/*
575 * Used to reserve address ranges in the aperture (e.g. for exclusion 733 * Used to reserve address ranges in the aperture (e.g. for exclusion
576 * ranges. 734 * ranges.
@@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
587 iommu_area_reserve(dom->bitmap, start_page, pages); 745 iommu_area_reserve(dom->bitmap, start_page, pages);
588} 746}
589 747
590static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 748static void free_pagetable(struct protection_domain *domain)
591{ 749{
592 int i, j; 750 int i, j;
593 u64 *p1, *p2, *p3; 751 u64 *p1, *p2, *p3;
594 752
595 p1 = dma_dom->domain.pt_root; 753 p1 = domain->pt_root;
596 754
597 if (!p1) 755 if (!p1)
598 return; 756 return;
@@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
613 } 771 }
614 772
615 free_page((unsigned long)p1); 773 free_page((unsigned long)p1);
774
775 domain->pt_root = NULL;
616} 776}
617 777
618/* 778/*
@@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
624 if (!dom) 784 if (!dom)
625 return; 785 return;
626 786
627 dma_ops_free_pagetable(dom); 787 free_pagetable(&dom->domain);
628 788
629 kfree(dom->pte_pages); 789 kfree(dom->pte_pages);
630 790
@@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
663 goto free_dma_dom; 823 goto free_dma_dom;
664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 824 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 825 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
826 dma_dom->domain.flags = PD_DMA_OPS_MASK;
666 dma_dom->domain.priv = dma_dom; 827 dma_dom->domain.priv = dma_dom;
667 if (!dma_dom->domain.pt_root) 828 if (!dma_dom->domain.pt_root)
668 goto free_dma_dom; 829 goto free_dma_dom;
@@ -725,6 +886,15 @@ free_dma_dom:
725} 886}
726 887
727/* 888/*
889 * little helper function to check whether a given protection domain is a
890 * dma_ops domain
891 */
892static bool dma_ops_domain(struct protection_domain *domain)
893{
894 return domain->flags & PD_DMA_OPS_MASK;
895}
896
897/*
728 * Find out the protection domain structure for a given PCI device. This 898 * Find out the protection domain structure for a given PCI device. This
729 * will give us the pointer to the page table root for example. 899 * will give us the pointer to the page table root for example.
730 */ 900 */
@@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid)
744 * If a device is not yet associated with a domain, this function does 914 * If a device is not yet associated with a domain, this function does
745 * assigns it visible for the hardware 915 * assigns it visible for the hardware
746 */ 916 */
747static void set_device_domain(struct amd_iommu *iommu, 917static void attach_device(struct amd_iommu *iommu,
748 struct protection_domain *domain, 918 struct protection_domain *domain,
749 u16 devid) 919 u16 devid)
750{ 920{
751 unsigned long flags; 921 unsigned long flags;
752
753 u64 pte_root = virt_to_phys(domain->pt_root); 922 u64 pte_root = virt_to_phys(domain->pt_root);
754 923
924 domain->dev_cnt += 1;
925
755 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 926 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
756 << DEV_ENTRY_MODE_SHIFT; 927 << DEV_ENTRY_MODE_SHIFT;
757 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 928 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu,
767 iommu_queue_inv_dev_entry(iommu, devid); 938 iommu_queue_inv_dev_entry(iommu, devid);
768} 939}
769 940
941/*
942 * Removes a device from a protection domain (unlocked)
943 */
944static void __detach_device(struct protection_domain *domain, u16 devid)
945{
946
947 /* lock domain */
948 spin_lock(&domain->lock);
949
950 /* remove domain from the lookup table */
951 amd_iommu_pd_table[devid] = NULL;
952
953 /* remove entry from the device table seen by the hardware */
954 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
955 amd_iommu_dev_table[devid].data[1] = 0;
956 amd_iommu_dev_table[devid].data[2] = 0;
957
958 /* decrease reference counter */
959 domain->dev_cnt -= 1;
960
961 /* ready */
962 spin_unlock(&domain->lock);
963}
964
965/*
966 * Removes a device from a protection domain (with devtable_lock held)
967 */
968static void detach_device(struct protection_domain *domain, u16 devid)
969{
970 unsigned long flags;
971
972 /* lock device table */
973 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
974 __detach_device(domain, devid);
975 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
976}
977
978static int device_change_notifier(struct notifier_block *nb,
979 unsigned long action, void *data)
980{
981 struct device *dev = data;
982 struct pci_dev *pdev = to_pci_dev(dev);
983 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
984 struct protection_domain *domain;
985 struct dma_ops_domain *dma_domain;
986 struct amd_iommu *iommu;
987 int order = amd_iommu_aperture_order;
988 unsigned long flags;
989
990 if (devid > amd_iommu_last_bdf)
991 goto out;
992
993 devid = amd_iommu_alias_table[devid];
994
995 iommu = amd_iommu_rlookup_table[devid];
996 if (iommu == NULL)
997 goto out;
998
999 domain = domain_for_device(devid);
1000
1001 if (domain && !dma_ops_domain(domain))
1002 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1003 "to a non-dma-ops domain\n", dev_name(dev));
1004
1005 switch (action) {
1006 case BUS_NOTIFY_BOUND_DRIVER:
1007 if (domain)
1008 goto out;
1009 dma_domain = find_protection_domain(devid);
1010 if (!dma_domain)
1011 dma_domain = iommu->default_dom;
1012 attach_device(iommu, &dma_domain->domain, devid);
1013 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1014 "device %s\n", dma_domain->domain.id, dev_name(dev));
1015 break;
1016 case BUS_NOTIFY_UNBIND_DRIVER:
1017 if (!domain)
1018 goto out;
1019 detach_device(domain, devid);
1020 break;
1021 case BUS_NOTIFY_ADD_DEVICE:
1022 /* allocate a protection domain if a device is added */
1023 dma_domain = find_protection_domain(devid);
1024 if (dma_domain)
1025 goto out;
1026 dma_domain = dma_ops_domain_alloc(iommu, order);
1027 if (!dma_domain)
1028 goto out;
1029 dma_domain->target_dev = devid;
1030
1031 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1032 list_add_tail(&dma_domain->list, &iommu_pd_list);
1033 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1034
1035 break;
1036 default:
1037 goto out;
1038 }
1039
1040 iommu_queue_inv_dev_entry(iommu, devid);
1041 iommu_completion_wait(iommu);
1042
1043out:
1044 return 0;
1045}
1046
1047struct notifier_block device_nb = {
1048 .notifier_call = device_change_notifier,
1049};
1050
770/***************************************************************************** 1051/*****************************************************************************
771 * 1052 *
772 * The next functions belong to the dma_ops mapping/unmapping code. 1053 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
802 list_for_each_entry(entry, &iommu_pd_list, list) { 1083 list_for_each_entry(entry, &iommu_pd_list, list) {
803 if (entry->target_dev == devid) { 1084 if (entry->target_dev == devid) {
804 ret = entry; 1085 ret = entry;
805 list_del(&ret->list);
806 break; 1086 break;
807 } 1087 }
808 } 1088 }
@@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev,
853 if (!dma_dom) 1133 if (!dma_dom)
854 dma_dom = (*iommu)->default_dom; 1134 dma_dom = (*iommu)->default_dom;
855 *domain = &dma_dom->domain; 1135 *domain = &dma_dom->domain;
856 set_device_domain(*iommu, *domain, *bdf); 1136 attach_device(*iommu, *domain, *bdf);
857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1137 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
858 "device ", (*domain)->id); 1138 "device %s\n", (*domain)->id, dev_name(dev));
859 print_devid(_bdf, 1);
860 } 1139 }
861 1140
862 if (domain_for_device(_bdf) == NULL) 1141 if (domain_for_device(_bdf) == NULL)
863 set_device_domain(*iommu, *domain, _bdf); 1142 attach_device(*iommu, *domain, _bdf);
864 1143
865 return 1; 1144 return 1;
866} 1145}
@@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev,
946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1225 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
947 paddr &= PAGE_MASK; 1226 paddr &= PAGE_MASK;
948 1227
1228 INC_STATS_COUNTER(total_map_requests);
1229
1230 if (pages > 1)
1231 INC_STATS_COUNTER(cross_page);
1232
949 if (align) 1233 if (align)
950 align_mask = (1UL << get_order(size)) - 1; 1234 align_mask = (1UL << get_order(size)) - 1;
951 1235
@@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev,
962 } 1246 }
963 address += offset; 1247 address += offset;
964 1248
1249 ADD_STATS_COUNTER(alloced_io_mem, size);
1250
965 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1251 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
966 iommu_flush_tlb(iommu, dma_dom->domain.id); 1252 iommu_flush_tlb(iommu, dma_dom->domain.id);
967 dma_dom->need_flush = false; 1253 dma_dom->need_flush = false;
@@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu,
998 start += PAGE_SIZE; 1284 start += PAGE_SIZE;
999 } 1285 }
1000 1286
1287 SUB_STATS_COUNTER(alloced_io_mem, size);
1288
1001 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1289 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1002 1290
1003 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1291 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
@@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1019 dma_addr_t addr; 1307 dma_addr_t addr;
1020 u64 dma_mask; 1308 u64 dma_mask;
1021 1309
1310 INC_STATS_COUNTER(cnt_map_single);
1311
1022 if (!check_device(dev)) 1312 if (!check_device(dev))
1023 return bad_dma_address; 1313 return bad_dma_address;
1024 1314
@@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1030 /* device not handled by any AMD IOMMU */ 1320 /* device not handled by any AMD IOMMU */
1031 return (dma_addr_t)paddr; 1321 return (dma_addr_t)paddr;
1032 1322
1323 if (!dma_ops_domain(domain))
1324 return bad_dma_address;
1325
1033 spin_lock_irqsave(&domain->lock, flags); 1326 spin_lock_irqsave(&domain->lock, flags);
1034 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1327 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1035 dma_mask); 1328 dma_mask);
@@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1055 struct protection_domain *domain; 1348 struct protection_domain *domain;
1056 u16 devid; 1349 u16 devid;
1057 1350
1351 INC_STATS_COUNTER(cnt_unmap_single);
1352
1058 if (!check_device(dev) || 1353 if (!check_device(dev) ||
1059 !get_device_resources(dev, &iommu, &domain, &devid)) 1354 !get_device_resources(dev, &iommu, &domain, &devid))
1060 /* device not handled by any AMD IOMMU */ 1355 /* device not handled by any AMD IOMMU */
1061 return; 1356 return;
1062 1357
1358 if (!dma_ops_domain(domain))
1359 return;
1360
1063 spin_lock_irqsave(&domain->lock, flags); 1361 spin_lock_irqsave(&domain->lock, flags);
1064 1362
1065 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1363 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1104 int mapped_elems = 0; 1402 int mapped_elems = 0;
1105 u64 dma_mask; 1403 u64 dma_mask;
1106 1404
1405 INC_STATS_COUNTER(cnt_map_sg);
1406
1107 if (!check_device(dev)) 1407 if (!check_device(dev))
1108 return 0; 1408 return 0;
1109 1409
@@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1114 if (!iommu || !domain) 1414 if (!iommu || !domain)
1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1415 return map_sg_no_iommu(dev, sglist, nelems, dir);
1116 1416
1417 if (!dma_ops_domain(domain))
1418 return 0;
1419
1117 spin_lock_irqsave(&domain->lock, flags); 1420 spin_lock_irqsave(&domain->lock, flags);
1118 1421
1119 for_each_sg(sglist, s, nelems, i) { 1422 for_each_sg(sglist, s, nelems, i) {
@@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1163 u16 devid; 1466 u16 devid;
1164 int i; 1467 int i;
1165 1468
1469 INC_STATS_COUNTER(cnt_unmap_sg);
1470
1166 if (!check_device(dev) || 1471 if (!check_device(dev) ||
1167 !get_device_resources(dev, &iommu, &domain, &devid)) 1472 !get_device_resources(dev, &iommu, &domain, &devid))
1168 return; 1473 return;
1169 1474
1475 if (!dma_ops_domain(domain))
1476 return;
1477
1170 spin_lock_irqsave(&domain->lock, flags); 1478 spin_lock_irqsave(&domain->lock, flags);
1171 1479
1172 for_each_sg(sglist, s, nelems, i) { 1480 for_each_sg(sglist, s, nelems, i) {
@@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
1194 phys_addr_t paddr; 1502 phys_addr_t paddr;
1195 u64 dma_mask = dev->coherent_dma_mask; 1503 u64 dma_mask = dev->coherent_dma_mask;
1196 1504
1505 INC_STATS_COUNTER(cnt_alloc_coherent);
1506
1197 if (!check_device(dev)) 1507 if (!check_device(dev))
1198 return NULL; 1508 return NULL;
1199 1509
@@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
1212 return virt_addr; 1522 return virt_addr;
1213 } 1523 }
1214 1524
1525 if (!dma_ops_domain(domain))
1526 goto out_free;
1527
1215 if (!dma_mask) 1528 if (!dma_mask)
1216 dma_mask = *dev->dma_mask; 1529 dma_mask = *dev->dma_mask;
1217 1530
@@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1533 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1534 size, DMA_BIDIRECTIONAL, true, dma_mask);
1222 1535
1223 if (*dma_addr == bad_dma_address) { 1536 if (*dma_addr == bad_dma_address)
1224 free_pages((unsigned long)virt_addr, get_order(size)); 1537 goto out_free;
1225 virt_addr = NULL;
1226 goto out;
1227 }
1228 1538
1229 iommu_completion_wait(iommu); 1539 iommu_completion_wait(iommu);
1230 1540
1231out:
1232 spin_unlock_irqrestore(&domain->lock, flags); 1541 spin_unlock_irqrestore(&domain->lock, flags);
1233 1542
1234 return virt_addr; 1543 return virt_addr;
1544
1545out_free:
1546
1547 free_pages((unsigned long)virt_addr, get_order(size));
1548
1549 return NULL;
1235} 1550}
1236 1551
1237/* 1552/*
@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size,
1245 struct protection_domain *domain; 1560 struct protection_domain *domain;
1246 u16 devid; 1561 u16 devid;
1247 1562
1563 INC_STATS_COUNTER(cnt_free_coherent);
1564
1248 if (!check_device(dev)) 1565 if (!check_device(dev))
1249 return; 1566 return;
1250 1567
@@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size,
1253 if (!iommu || !domain) 1570 if (!iommu || !domain)
1254 goto free_mem; 1571 goto free_mem;
1255 1572
1573 if (!dma_ops_domain(domain))
1574 goto free_mem;
1575
1256 spin_lock_irqsave(&domain->lock, flags); 1576 spin_lock_irqsave(&domain->lock, flags);
1257 1577
1258 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1578 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
@@ -1305,7 +1625,7 @@ static void prealloc_protection_domains(void)
1305 u16 devid; 1625 u16 devid;
1306 1626
1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1627 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1308 devid = (dev->bus->number << 8) | dev->devfn; 1628 devid = calc_devid(dev->bus->number, dev->devfn);
1309 if (devid > amd_iommu_last_bdf) 1629 if (devid > amd_iommu_last_bdf)
1310 continue; 1630 continue;
1311 devid = amd_iommu_alias_table[devid]; 1631 devid = amd_iommu_alias_table[devid];
@@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1672 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
1353 if (iommu->default_dom == NULL) 1673 if (iommu->default_dom == NULL)
1354 return -ENOMEM; 1674 return -ENOMEM;
1675 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
1355 ret = iommu_init_unity_mappings(iommu); 1676 ret = iommu_init_unity_mappings(iommu);
1356 if (ret) 1677 if (ret)
1357 goto free_domains; 1678 goto free_domains;
@@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void)
1375 /* Make the driver finally visible to the drivers */ 1696 /* Make the driver finally visible to the drivers */
1376 dma_ops = &amd_iommu_dma_ops; 1697 dma_ops = &amd_iommu_dma_ops;
1377 1698
1699 register_iommu(&amd_iommu_ops);
1700
1701 bus_register_notifier(&pci_bus_type, &device_nb);
1702
1703 amd_iommu_stats_init();
1704
1378 return 0; 1705 return 0;
1379 1706
1380free_domains: 1707free_domains:
@@ -1386,3 +1713,224 @@ free_domains:
1386 1713
1387 return ret; 1714 return ret;
1388} 1715}
1716
1717/*****************************************************************************
1718 *
1719 * The following functions belong to the exported interface of AMD IOMMU
1720 *
1721 * This interface allows access to lower level functions of the IOMMU
1722 * like protection domain handling and assignement of devices to domains
1723 * which is not possible with the dma_ops interface.
1724 *
1725 *****************************************************************************/
1726
1727static void cleanup_domain(struct protection_domain *domain)
1728{
1729 unsigned long flags;
1730 u16 devid;
1731
1732 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1733
1734 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
1735 if (amd_iommu_pd_table[devid] == domain)
1736 __detach_device(domain, devid);
1737
1738 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1739}
1740
1741static int amd_iommu_domain_init(struct iommu_domain *dom)
1742{
1743 struct protection_domain *domain;
1744
1745 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1746 if (!domain)
1747 return -ENOMEM;
1748
1749 spin_lock_init(&domain->lock);
1750 domain->mode = PAGE_MODE_3_LEVEL;
1751 domain->id = domain_id_alloc();
1752 if (!domain->id)
1753 goto out_free;
1754 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1755 if (!domain->pt_root)
1756 goto out_free;
1757
1758 dom->priv = domain;
1759
1760 return 0;
1761
1762out_free:
1763 kfree(domain);
1764
1765 return -ENOMEM;
1766}
1767
1768static void amd_iommu_domain_destroy(struct iommu_domain *dom)
1769{
1770 struct protection_domain *domain = dom->priv;
1771
1772 if (!domain)
1773 return;
1774
1775 if (domain->dev_cnt > 0)
1776 cleanup_domain(domain);
1777
1778 BUG_ON(domain->dev_cnt != 0);
1779
1780 free_pagetable(domain);
1781
1782 domain_id_free(domain->id);
1783
1784 kfree(domain);
1785
1786 dom->priv = NULL;
1787}
1788
1789static void amd_iommu_detach_device(struct iommu_domain *dom,
1790 struct device *dev)
1791{
1792 struct protection_domain *domain = dom->priv;
1793 struct amd_iommu *iommu;
1794 struct pci_dev *pdev;
1795 u16 devid;
1796
1797 if (dev->bus != &pci_bus_type)
1798 return;
1799
1800 pdev = to_pci_dev(dev);
1801
1802 devid = calc_devid(pdev->bus->number, pdev->devfn);
1803
1804 if (devid > 0)
1805 detach_device(domain, devid);
1806
1807 iommu = amd_iommu_rlookup_table[devid];
1808 if (!iommu)
1809 return;
1810
1811 iommu_queue_inv_dev_entry(iommu, devid);
1812 iommu_completion_wait(iommu);
1813}
1814
1815static int amd_iommu_attach_device(struct iommu_domain *dom,
1816 struct device *dev)
1817{
1818 struct protection_domain *domain = dom->priv;
1819 struct protection_domain *old_domain;
1820 struct amd_iommu *iommu;
1821 struct pci_dev *pdev;
1822 u16 devid;
1823
1824 if (dev->bus != &pci_bus_type)
1825 return -EINVAL;
1826
1827 pdev = to_pci_dev(dev);
1828
1829 devid = calc_devid(pdev->bus->number, pdev->devfn);
1830
1831 if (devid >= amd_iommu_last_bdf ||
1832 devid != amd_iommu_alias_table[devid])
1833 return -EINVAL;
1834
1835 iommu = amd_iommu_rlookup_table[devid];
1836 if (!iommu)
1837 return -EINVAL;
1838
1839 old_domain = domain_for_device(devid);
1840 if (old_domain)
1841 return -EBUSY;
1842
1843 attach_device(iommu, domain, devid);
1844
1845 iommu_completion_wait(iommu);
1846
1847 return 0;
1848}
1849
1850static int amd_iommu_map_range(struct iommu_domain *dom,
1851 unsigned long iova, phys_addr_t paddr,
1852 size_t size, int iommu_prot)
1853{
1854 struct protection_domain *domain = dom->priv;
1855 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
1856 int prot = 0;
1857 int ret;
1858
1859 if (iommu_prot & IOMMU_READ)
1860 prot |= IOMMU_PROT_IR;
1861 if (iommu_prot & IOMMU_WRITE)
1862 prot |= IOMMU_PROT_IW;
1863
1864 iova &= PAGE_MASK;
1865 paddr &= PAGE_MASK;
1866
1867 for (i = 0; i < npages; ++i) {
1868 ret = iommu_map_page(domain, iova, paddr, prot);
1869 if (ret)
1870 return ret;
1871
1872 iova += PAGE_SIZE;
1873 paddr += PAGE_SIZE;
1874 }
1875
1876 return 0;
1877}
1878
1879static void amd_iommu_unmap_range(struct iommu_domain *dom,
1880 unsigned long iova, size_t size)
1881{
1882
1883 struct protection_domain *domain = dom->priv;
1884 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
1885
1886 iova &= PAGE_MASK;
1887
1888 for (i = 0; i < npages; ++i) {
1889 iommu_unmap_page(domain, iova);
1890 iova += PAGE_SIZE;
1891 }
1892
1893 iommu_flush_domain(domain->id);
1894}
1895
1896static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
1897 unsigned long iova)
1898{
1899 struct protection_domain *domain = dom->priv;
1900 unsigned long offset = iova & ~PAGE_MASK;
1901 phys_addr_t paddr;
1902 u64 *pte;
1903
1904 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
1905
1906 if (!IOMMU_PTE_PRESENT(*pte))
1907 return 0;
1908
1909 pte = IOMMU_PTE_PAGE(*pte);
1910 pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
1911
1912 if (!IOMMU_PTE_PRESENT(*pte))
1913 return 0;
1914
1915 pte = IOMMU_PTE_PAGE(*pte);
1916 pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
1917
1918 if (!IOMMU_PTE_PRESENT(*pte))
1919 return 0;
1920
1921 paddr = *pte & IOMMU_PAGE_MASK;
1922 paddr |= offset;
1923
1924 return paddr;
1925}
1926
1927static struct iommu_ops amd_iommu_ops = {
1928 .domain_init = amd_iommu_domain_init,
1929 .domain_destroy = amd_iommu_domain_destroy,
1930 .attach_dev = amd_iommu_attach_device,
1931 .detach_dev = amd_iommu_detach_device,
1932 .map = amd_iommu_map_range,
1933 .unmap = amd_iommu_unmap_range,
1934 .iova_to_phys = amd_iommu_iova_to_phys,
1935};
1936
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index fb85e8d466cc..42c33cebf00f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 123 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
125int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 125bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */
126bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
127 128
128LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -245,12 +246,8 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
245/* Function to enable the hardware */ 246/* Function to enable the hardware */
246static void __init iommu_enable(struct amd_iommu *iommu) 247static void __init iommu_enable(struct amd_iommu *iommu)
247{ 248{
248 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
249 "at %02x:%02x.%x cap 0x%hx\n", 250 dev_name(&iommu->dev->dev), iommu->cap_ptr);
250 iommu->dev->bus->number,
251 PCI_SLOT(iommu->dev->devfn),
252 PCI_FUNC(iommu->dev->devfn),
253 iommu->cap_ptr);
254 251
255 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
256} 253}
@@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
1218{ 1215{
1219 for (; *str; ++str) { 1216 for (; *str; ++str) {
1220 if (strncmp(str, "isolate", 7) == 0) 1217 if (strncmp(str, "isolate", 7) == 0)
1221 amd_iommu_isolate = 1; 1218 amd_iommu_isolate = true;
1222 if (strncmp(str, "share", 5) == 0) 1219 if (strncmp(str, "share", 5) == 0)
1223 amd_iommu_isolate = 0; 1220 amd_iommu_isolate = false;
1224 if (strncmp(str, "fullflush", 9) == 0) 1221 if (strncmp(str, "fullflush", 9) == 0)
1225 amd_iommu_unmap_flush = true; 1222 amd_iommu_unmap_flush = true;
1226 } 1223 }
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c02343594b4d..d3ec292f00f2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,8 +7,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
7ifeq ($(CONFIG_KVM_TRACE),y) 7ifeq ($(CONFIG_KVM_TRACE),y)
8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) 8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
9endif 9endif
10ifeq ($(CONFIG_DMAR),y) 10ifeq ($(CONFIG_IOMMU_API),y)
11common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 11common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
12endif 12endif
13 13
14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm 14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e6aa8141dcd..cc17546a2406 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -34,6 +34,7 @@
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/mman.h> 35#include <linux/mman.h>
36#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/iommu.h>
37#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
@@ -989,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
989 r = !tdp_enabled; 990 r = !tdp_enabled;
990 break; 991 break;
991 case KVM_CAP_IOMMU: 992 case KVM_CAP_IOMMU:
992 r = intel_iommu_found(); 993 r = iommu_found();
993 break; 994 break;
994 default: 995 default:
995 r = 0; 996 r = 0;
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index c66637392bbc..b5b8ba512b28 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_FW_LOADER) += firmware_class.o
11obj-$(CONFIG_NUMA) += node.o 11obj-$(CONFIG_NUMA) += node.o
12obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o 12obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
13obj-$(CONFIG_SMP) += topology.o 13obj-$(CONFIG_SMP) += topology.o
14obj-$(CONFIG_IOMMU_API) += iommu.o
14ifeq ($(CONFIG_SYSFS),y) 15ifeq ($(CONFIG_SYSFS),y)
15obj-$(CONFIG_MODULES) += module.o 16obj-$(CONFIG_MODULES) += module.o
16endif 17endif
diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
new file mode 100644
index 000000000000..5e039d4f877c
--- /dev/null
+++ b/drivers/base/iommu.c
@@ -0,0 +1,100 @@
1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/bug.h>
20#include <linux/types.h>
21#include <linux/errno.h>
22#include <linux/iommu.h>
23
24static struct iommu_ops *iommu_ops;
25
26void register_iommu(struct iommu_ops *ops)
27{
28 if (iommu_ops)
29 BUG();
30
31 iommu_ops = ops;
32}
33
34bool iommu_found()
35{
36 return iommu_ops != NULL;
37}
38EXPORT_SYMBOL_GPL(iommu_found);
39
40struct iommu_domain *iommu_domain_alloc(void)
41{
42 struct iommu_domain *domain;
43 int ret;
44
45 domain = kmalloc(sizeof(*domain), GFP_KERNEL);
46 if (!domain)
47 return NULL;
48
49 ret = iommu_ops->domain_init(domain);
50 if (ret)
51 goto out_free;
52
53 return domain;
54
55out_free:
56 kfree(domain);
57
58 return NULL;
59}
60EXPORT_SYMBOL_GPL(iommu_domain_alloc);
61
62void iommu_domain_free(struct iommu_domain *domain)
63{
64 iommu_ops->domain_destroy(domain);
65 kfree(domain);
66}
67EXPORT_SYMBOL_GPL(iommu_domain_free);
68
69int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
70{
71 return iommu_ops->attach_dev(domain, dev);
72}
73EXPORT_SYMBOL_GPL(iommu_attach_device);
74
75void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
76{
77 iommu_ops->detach_dev(domain, dev);
78}
79EXPORT_SYMBOL_GPL(iommu_detach_device);
80
81int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
82 phys_addr_t paddr, size_t size, int prot)
83{
84 return iommu_ops->map(domain, iova, paddr, size, prot);
85}
86EXPORT_SYMBOL_GPL(iommu_map_range);
87
88void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
89 size_t size)
90{
91 iommu_ops->unmap(domain, iova, size);
92}
93EXPORT_SYMBOL_GPL(iommu_unmap_range);
94
95phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
96 unsigned long iova)
97{
98 return iommu_ops->iova_to_phys(domain, iova);
99}
100EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 691b3adeb870..f5a662a50acb 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -191,26 +191,17 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
191static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) 191static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
192{ 192{
193 struct acpi_dmar_hardware_unit *drhd; 193 struct acpi_dmar_hardware_unit *drhd;
194 static int include_all;
195 int ret = 0; 194 int ret = 0;
196 195
197 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; 196 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
198 197
199 if (!dmaru->include_all) 198 if (dmaru->include_all)
200 ret = dmar_parse_dev_scope((void *)(drhd + 1), 199 return 0;
200
201 ret = dmar_parse_dev_scope((void *)(drhd + 1),
201 ((void *)drhd) + drhd->header.length, 202 ((void *)drhd) + drhd->header.length,
202 &dmaru->devices_cnt, &dmaru->devices, 203 &dmaru->devices_cnt, &dmaru->devices,
203 drhd->segment); 204 drhd->segment);
204 else {
205 /* Only allow one INCLUDE_ALL */
206 if (include_all) {
207 printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
208 "device scope is allowed\n");
209 ret = -EINVAL;
210 }
211 include_all = 1;
212 }
213
214 if (ret) { 205 if (ret) {
215 list_del(&dmaru->list); 206 list_del(&dmaru->list);
216 kfree(dmaru); 207 kfree(dmaru);
@@ -384,12 +375,21 @@ int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
384struct dmar_drhd_unit * 375struct dmar_drhd_unit *
385dmar_find_matched_drhd_unit(struct pci_dev *dev) 376dmar_find_matched_drhd_unit(struct pci_dev *dev)
386{ 377{
387 struct dmar_drhd_unit *drhd = NULL; 378 struct dmar_drhd_unit *dmaru = NULL;
379 struct acpi_dmar_hardware_unit *drhd;
388 380
389 list_for_each_entry(drhd, &dmar_drhd_units, list) { 381 list_for_each_entry(dmaru, &dmar_drhd_units, list) {
390 if (drhd->include_all || dmar_pci_device_match(drhd->devices, 382 drhd = container_of(dmaru->hdr,
391 drhd->devices_cnt, dev)) 383 struct acpi_dmar_hardware_unit,
392 return drhd; 384 header);
385
386 if (dmaru->include_all &&
387 drhd->segment == pci_domain_nr(dev->bus))
388 return dmaru;
389
390 if (dmar_pci_device_match(dmaru->devices,
391 dmaru->devices_cnt, dev))
392 return dmaru;
393 } 393 }
394 394
395 return NULL; 395 return NULL;
@@ -491,6 +491,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
491 int map_size; 491 int map_size;
492 u32 ver; 492 u32 ver;
493 static int iommu_allocated = 0; 493 static int iommu_allocated = 0;
494 int agaw;
494 495
495 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); 496 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
496 if (!iommu) 497 if (!iommu)
@@ -506,6 +507,15 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
506 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); 507 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
507 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); 508 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
508 509
510 agaw = iommu_calculate_agaw(iommu);
511 if (agaw < 0) {
512 printk(KERN_ERR
513 "Cannot get a valid agaw for iommu (seq_id = %d)\n",
514 iommu->seq_id);
515 goto error;
516 }
517 iommu->agaw = agaw;
518
509 /* the registers might be more than one page */ 519 /* the registers might be more than one page */
510 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), 520 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
511 cap_max_fault_reg_offset(iommu->cap)); 521 cap_max_fault_reg_offset(iommu->cap));
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa43ac9c..235fb7a5a8a5 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -27,7 +27,6 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/irq.h> 28#include <linux/irq.h>
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
30#include <linux/sysdev.h>
31#include <linux/spinlock.h> 30#include <linux/spinlock.h>
32#include <linux/pci.h> 31#include <linux/pci.h>
33#include <linux/dmar.h> 32#include <linux/dmar.h>
@@ -35,6 +34,7 @@
35#include <linux/mempool.h> 34#include <linux/mempool.h>
36#include <linux/timer.h> 35#include <linux/timer.h>
37#include <linux/iova.h> 36#include <linux/iova.h>
37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <asm/cacheflush.h> 39#include <asm/cacheflush.h>
40#include <asm/iommu.h> 40#include <asm/iommu.h>
@@ -54,6 +54,195 @@
54 54
55#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 55#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56 56
57#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
58#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
59#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
60
61/* global iommu list, set NULL for ignored DMAR units */
62static struct intel_iommu **g_iommus;
63
64/*
65 * 0: Present
66 * 1-11: Reserved
67 * 12-63: Context Ptr (12 - (haw-1))
68 * 64-127: Reserved
69 */
70struct root_entry {
71 u64 val;
72 u64 rsvd1;
73};
74#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
75static inline bool root_present(struct root_entry *root)
76{
77 return (root->val & 1);
78}
79static inline void set_root_present(struct root_entry *root)
80{
81 root->val |= 1;
82}
83static inline void set_root_value(struct root_entry *root, unsigned long value)
84{
85 root->val |= value & VTD_PAGE_MASK;
86}
87
88static inline struct context_entry *
89get_context_addr_from_root(struct root_entry *root)
90{
91 return (struct context_entry *)
92 (root_present(root)?phys_to_virt(
93 root->val & VTD_PAGE_MASK) :
94 NULL);
95}
96
97/*
98 * low 64 bits:
99 * 0: present
100 * 1: fault processing disable
101 * 2-3: translation type
102 * 12-63: address space root
103 * high 64 bits:
104 * 0-2: address width
105 * 3-6: aval
106 * 8-23: domain id
107 */
108struct context_entry {
109 u64 lo;
110 u64 hi;
111};
112
113static inline bool context_present(struct context_entry *context)
114{
115 return (context->lo & 1);
116}
117static inline void context_set_present(struct context_entry *context)
118{
119 context->lo |= 1;
120}
121
122static inline void context_set_fault_enable(struct context_entry *context)
123{
124 context->lo &= (((u64)-1) << 2) | 1;
125}
126
127#define CONTEXT_TT_MULTI_LEVEL 0
128
129static inline void context_set_translation_type(struct context_entry *context,
130 unsigned long value)
131{
132 context->lo &= (((u64)-1) << 4) | 3;
133 context->lo |= (value & 3) << 2;
134}
135
136static inline void context_set_address_root(struct context_entry *context,
137 unsigned long value)
138{
139 context->lo |= value & VTD_PAGE_MASK;
140}
141
142static inline void context_set_address_width(struct context_entry *context,
143 unsigned long value)
144{
145 context->hi |= value & 7;
146}
147
148static inline void context_set_domain_id(struct context_entry *context,
149 unsigned long value)
150{
151 context->hi |= (value & ((1 << 16) - 1)) << 8;
152}
153
154static inline void context_clear_entry(struct context_entry *context)
155{
156 context->lo = 0;
157 context->hi = 0;
158}
159
160/*
161 * 0: readable
162 * 1: writable
163 * 2-6: reserved
164 * 7: super page
165 * 8-11: available
166 * 12-63: Host physcial address
167 */
168struct dma_pte {
169 u64 val;
170};
171
172static inline void dma_clear_pte(struct dma_pte *pte)
173{
174 pte->val = 0;
175}
176
177static inline void dma_set_pte_readable(struct dma_pte *pte)
178{
179 pte->val |= DMA_PTE_READ;
180}
181
182static inline void dma_set_pte_writable(struct dma_pte *pte)
183{
184 pte->val |= DMA_PTE_WRITE;
185}
186
187static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
188{
189 pte->val = (pte->val & ~3) | (prot & 3);
190}
191
192static inline u64 dma_pte_addr(struct dma_pte *pte)
193{
194 return (pte->val & VTD_PAGE_MASK);
195}
196
197static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
198{
199 pte->val |= (addr & VTD_PAGE_MASK);
200}
201
202static inline bool dma_pte_present(struct dma_pte *pte)
203{
204 return (pte->val & 3) != 0;
205}
206
207/* devices under the same p2p bridge are owned in one domain */
208#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
209
210/* domain represents a virtual machine, more than one devices
211 * across iommus may be owned in one domain, e.g. kvm guest.
212 */
213#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
214
215struct dmar_domain {
216 int id; /* domain id */
217 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
218
219 struct list_head devices; /* all devices' list */
220 struct iova_domain iovad; /* iova's that belong to this domain */
221
222 struct dma_pte *pgd; /* virtual address */
223 spinlock_t mapping_lock; /* page table lock */
224 int gaw; /* max guest address width */
225
226 /* adjusted guest address width, 0 is level 2 30-bit */
227 int agaw;
228
229 int flags; /* flags to find out type of domain */
230
231 int iommu_coherency;/* indicate coherency of iommu access */
232 int iommu_count; /* reference count of iommu */
233 spinlock_t iommu_lock; /* protect iommu set in domain */
234 u64 max_addr; /* maximum mapped address */
235};
236
237/* PCI domain-device relationship */
238struct device_domain_info {
239 struct list_head link; /* link to domain siblings */
240 struct list_head global; /* link to global list */
241 u8 bus; /* PCI bus numer */
242 u8 devfn; /* PCI devfn number */
243 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
244 struct dmar_domain *domain; /* pointer to domain */
245};
57 246
58static void flush_unmaps_timeout(unsigned long data); 247static void flush_unmaps_timeout(unsigned long data);
59 248
@@ -88,6 +277,8 @@ static int intel_iommu_strict;
88static DEFINE_SPINLOCK(device_domain_lock); 277static DEFINE_SPINLOCK(device_domain_lock);
89static LIST_HEAD(device_domain_list); 278static LIST_HEAD(device_domain_list);
90 279
280static struct iommu_ops intel_iommu_ops;
281
91static int __init intel_iommu_setup(char *str) 282static int __init intel_iommu_setup(char *str)
92{ 283{
93 if (!str) 284 if (!str)
@@ -184,6 +375,87 @@ void free_iova_mem(struct iova *iova)
184 kmem_cache_free(iommu_iova_cache, iova); 375 kmem_cache_free(iommu_iova_cache, iova);
185} 376}
186 377
378
379static inline int width_to_agaw(int width);
380
381/* calculate agaw for each iommu.
382 * "SAGAW" may be different across iommus, use a default agaw, and
383 * get a supported less agaw for iommus that don't support the default agaw.
384 */
385int iommu_calculate_agaw(struct intel_iommu *iommu)
386{
387 unsigned long sagaw;
388 int agaw = -1;
389
390 sagaw = cap_sagaw(iommu->cap);
391 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
392 agaw >= 0; agaw--) {
393 if (test_bit(agaw, &sagaw))
394 break;
395 }
396
397 return agaw;
398}
399
400/* in native case, each domain is related to only one iommu */
401static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
402{
403 int iommu_id;
404
405 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
406
407 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
408 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
409 return NULL;
410
411 return g_iommus[iommu_id];
412}
413
414/* "Coherency" capability may be different across iommus */
415static void domain_update_iommu_coherency(struct dmar_domain *domain)
416{
417 int i;
418
419 domain->iommu_coherency = 1;
420
421 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
422 for (; i < g_num_of_iommus; ) {
423 if (!ecap_coherent(g_iommus[i]->ecap)) {
424 domain->iommu_coherency = 0;
425 break;
426 }
427 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
428 }
429}
430
431static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
432{
433 struct dmar_drhd_unit *drhd = NULL;
434 int i;
435
436 for_each_drhd_unit(drhd) {
437 if (drhd->ignored)
438 continue;
439
440 for (i = 0; i < drhd->devices_cnt; i++)
441 if (drhd->devices[i]->bus->number == bus &&
442 drhd->devices[i]->devfn == devfn)
443 return drhd->iommu;
444
445 if (drhd->include_all)
446 return drhd->iommu;
447 }
448
449 return NULL;
450}
451
452static void domain_flush_cache(struct dmar_domain *domain,
453 void *addr, int size)
454{
455 if (!domain->iommu_coherency)
456 clflush_cache_range(addr, size);
457}
458
187/* Gets context entry for a given bus and devfn */ 459/* Gets context entry for a given bus and devfn */
188static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 460static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
189 u8 bus, u8 devfn) 461 u8 bus, u8 devfn)
@@ -226,7 +498,7 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
226 ret = 0; 498 ret = 0;
227 goto out; 499 goto out;
228 } 500 }
229 ret = context_present(context[devfn]); 501 ret = context_present(&context[devfn]);
230out: 502out:
231 spin_unlock_irqrestore(&iommu->lock, flags); 503 spin_unlock_irqrestore(&iommu->lock, flags);
232 return ret; 504 return ret;
@@ -242,7 +514,7 @@ static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
242 root = &iommu->root_entry[bus]; 514 root = &iommu->root_entry[bus];
243 context = get_context_addr_from_root(root); 515 context = get_context_addr_from_root(root);
244 if (context) { 516 if (context) {
245 context_clear_entry(context[devfn]); 517 context_clear_entry(&context[devfn]);
246 __iommu_flush_cache(iommu, &context[devfn], \ 518 __iommu_flush_cache(iommu, &context[devfn], \
247 sizeof(*context)); 519 sizeof(*context));
248 } 520 }
@@ -339,7 +611,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
339 if (level == 1) 611 if (level == 1)
340 break; 612 break;
341 613
342 if (!dma_pte_present(*pte)) { 614 if (!dma_pte_present(pte)) {
343 tmp_page = alloc_pgtable_page(); 615 tmp_page = alloc_pgtable_page();
344 616
345 if (!tmp_page) { 617 if (!tmp_page) {
@@ -347,18 +619,17 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
347 flags); 619 flags);
348 return NULL; 620 return NULL;
349 } 621 }
350 __iommu_flush_cache(domain->iommu, tmp_page, 622 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
351 PAGE_SIZE); 623 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
352 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
353 /* 624 /*
354 * high level table always sets r/w, last level page 625 * high level table always sets r/w, last level page
355 * table control read/write 626 * table control read/write
356 */ 627 */
357 dma_set_pte_readable(*pte); 628 dma_set_pte_readable(pte);
358 dma_set_pte_writable(*pte); 629 dma_set_pte_writable(pte);
359 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 630 domain_flush_cache(domain, pte, sizeof(*pte));
360 } 631 }
361 parent = phys_to_virt(dma_pte_addr(*pte)); 632 parent = phys_to_virt(dma_pte_addr(pte));
362 level--; 633 level--;
363 } 634 }
364 635
@@ -381,9 +652,9 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
381 if (level == total) 652 if (level == total)
382 return pte; 653 return pte;
383 654
384 if (!dma_pte_present(*pte)) 655 if (!dma_pte_present(pte))
385 break; 656 break;
386 parent = phys_to_virt(dma_pte_addr(*pte)); 657 parent = phys_to_virt(dma_pte_addr(pte));
387 total--; 658 total--;
388 } 659 }
389 return NULL; 660 return NULL;
@@ -398,8 +669,8 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
398 pte = dma_addr_level_pte(domain, addr, 1); 669 pte = dma_addr_level_pte(domain, addr, 1);
399 670
400 if (pte) { 671 if (pte) {
401 dma_clear_pte(*pte); 672 dma_clear_pte(pte);
402 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 673 domain_flush_cache(domain, pte, sizeof(*pte));
403 } 674 }
404} 675}
405 676
@@ -445,10 +716,9 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
445 pte = dma_addr_level_pte(domain, tmp, level); 716 pte = dma_addr_level_pte(domain, tmp, level);
446 if (pte) { 717 if (pte) {
447 free_pgtable_page( 718 free_pgtable_page(
448 phys_to_virt(dma_pte_addr(*pte))); 719 phys_to_virt(dma_pte_addr(pte)));
449 dma_clear_pte(*pte); 720 dma_clear_pte(pte);
450 __iommu_flush_cache(domain->iommu, 721 domain_flush_cache(domain, pte, sizeof(*pte));
451 pte, sizeof(*pte));
452 } 722 }
453 tmp += level_size(level); 723 tmp += level_size(level);
454 } 724 }
@@ -950,17 +1220,28 @@ static int iommu_init_domains(struct intel_iommu *iommu)
950 1220
951 1221
952static void domain_exit(struct dmar_domain *domain); 1222static void domain_exit(struct dmar_domain *domain);
1223static void vm_domain_exit(struct dmar_domain *domain);
953 1224
954void free_dmar_iommu(struct intel_iommu *iommu) 1225void free_dmar_iommu(struct intel_iommu *iommu)
955{ 1226{
956 struct dmar_domain *domain; 1227 struct dmar_domain *domain;
957 int i; 1228 int i;
1229 unsigned long flags;
958 1230
959 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 1231 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
960 for (; i < cap_ndoms(iommu->cap); ) { 1232 for (; i < cap_ndoms(iommu->cap); ) {
961 domain = iommu->domains[i]; 1233 domain = iommu->domains[i];
962 clear_bit(i, iommu->domain_ids); 1234 clear_bit(i, iommu->domain_ids);
963 domain_exit(domain); 1235
1236 spin_lock_irqsave(&domain->iommu_lock, flags);
1237 if (--domain->iommu_count == 0) {
1238 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1239 vm_domain_exit(domain);
1240 else
1241 domain_exit(domain);
1242 }
1243 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1244
964 i = find_next_bit(iommu->domain_ids, 1245 i = find_next_bit(iommu->domain_ids,
965 cap_ndoms(iommu->cap), i+1); 1246 cap_ndoms(iommu->cap), i+1);
966 } 1247 }
@@ -978,6 +1259,17 @@ void free_dmar_iommu(struct intel_iommu *iommu)
978 kfree(iommu->domains); 1259 kfree(iommu->domains);
979 kfree(iommu->domain_ids); 1260 kfree(iommu->domain_ids);
980 1261
1262 g_iommus[iommu->seq_id] = NULL;
1263
1264 /* if all iommus are freed, free g_iommus */
1265 for (i = 0; i < g_num_of_iommus; i++) {
1266 if (g_iommus[i])
1267 break;
1268 }
1269
1270 if (i == g_num_of_iommus)
1271 kfree(g_iommus);
1272
981 /* free context mapping */ 1273 /* free context mapping */
982 free_context_table(iommu); 1274 free_context_table(iommu);
983} 1275}
@@ -1006,7 +1298,9 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1006 1298
1007 set_bit(num, iommu->domain_ids); 1299 set_bit(num, iommu->domain_ids);
1008 domain->id = num; 1300 domain->id = num;
1009 domain->iommu = iommu; 1301 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1302 set_bit(iommu->seq_id, &domain->iommu_bmp);
1303 domain->flags = 0;
1010 iommu->domains[num] = domain; 1304 iommu->domains[num] = domain;
1011 spin_unlock_irqrestore(&iommu->lock, flags); 1305 spin_unlock_irqrestore(&iommu->lock, flags);
1012 1306
@@ -1016,10 +1310,13 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1016static void iommu_free_domain(struct dmar_domain *domain) 1310static void iommu_free_domain(struct dmar_domain *domain)
1017{ 1311{
1018 unsigned long flags; 1312 unsigned long flags;
1313 struct intel_iommu *iommu;
1314
1315 iommu = domain_get_iommu(domain);
1019 1316
1020 spin_lock_irqsave(&domain->iommu->lock, flags); 1317 spin_lock_irqsave(&iommu->lock, flags);
1021 clear_bit(domain->id, domain->iommu->domain_ids); 1318 clear_bit(domain->id, iommu->domain_ids);
1022 spin_unlock_irqrestore(&domain->iommu->lock, flags); 1319 spin_unlock_irqrestore(&iommu->lock, flags);
1023} 1320}
1024 1321
1025static struct iova_domain reserved_iova_list; 1322static struct iova_domain reserved_iova_list;
@@ -1094,11 +1391,12 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1094 1391
1095 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1392 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1096 spin_lock_init(&domain->mapping_lock); 1393 spin_lock_init(&domain->mapping_lock);
1394 spin_lock_init(&domain->iommu_lock);
1097 1395
1098 domain_reserve_special_ranges(domain); 1396 domain_reserve_special_ranges(domain);
1099 1397
1100 /* calculate AGAW */ 1398 /* calculate AGAW */
1101 iommu = domain->iommu; 1399 iommu = domain_get_iommu(domain);
1102 if (guest_width > cap_mgaw(iommu->cap)) 1400 if (guest_width > cap_mgaw(iommu->cap))
1103 guest_width = cap_mgaw(iommu->cap); 1401 guest_width = cap_mgaw(iommu->cap);
1104 domain->gaw = guest_width; 1402 domain->gaw = guest_width;
@@ -1115,6 +1413,13 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1115 domain->agaw = agaw; 1413 domain->agaw = agaw;
1116 INIT_LIST_HEAD(&domain->devices); 1414 INIT_LIST_HEAD(&domain->devices);
1117 1415
1416 if (ecap_coherent(iommu->ecap))
1417 domain->iommu_coherency = 1;
1418 else
1419 domain->iommu_coherency = 0;
1420
1421 domain->iommu_count = 1;
1422
1118 /* always allocate the top pgd */ 1423 /* always allocate the top pgd */
1119 domain->pgd = (struct dma_pte *)alloc_pgtable_page(); 1424 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1120 if (!domain->pgd) 1425 if (!domain->pgd)
@@ -1151,28 +1456,82 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1151 u8 bus, u8 devfn) 1456 u8 bus, u8 devfn)
1152{ 1457{
1153 struct context_entry *context; 1458 struct context_entry *context;
1154 struct intel_iommu *iommu = domain->iommu;
1155 unsigned long flags; 1459 unsigned long flags;
1460 struct intel_iommu *iommu;
1461 struct dma_pte *pgd;
1462 unsigned long num;
1463 unsigned long ndomains;
1464 int id;
1465 int agaw;
1156 1466
1157 pr_debug("Set context mapping for %02x:%02x.%d\n", 1467 pr_debug("Set context mapping for %02x:%02x.%d\n",
1158 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1468 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1159 BUG_ON(!domain->pgd); 1469 BUG_ON(!domain->pgd);
1470
1471 iommu = device_to_iommu(bus, devfn);
1472 if (!iommu)
1473 return -ENODEV;
1474
1160 context = device_to_context_entry(iommu, bus, devfn); 1475 context = device_to_context_entry(iommu, bus, devfn);
1161 if (!context) 1476 if (!context)
1162 return -ENOMEM; 1477 return -ENOMEM;
1163 spin_lock_irqsave(&iommu->lock, flags); 1478 spin_lock_irqsave(&iommu->lock, flags);
1164 if (context_present(*context)) { 1479 if (context_present(context)) {
1165 spin_unlock_irqrestore(&iommu->lock, flags); 1480 spin_unlock_irqrestore(&iommu->lock, flags);
1166 return 0; 1481 return 0;
1167 } 1482 }
1168 1483
1169 context_set_domain_id(*context, domain->id); 1484 id = domain->id;
1170 context_set_address_width(*context, domain->agaw); 1485 pgd = domain->pgd;
1171 context_set_address_root(*context, virt_to_phys(domain->pgd)); 1486
1172 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); 1487 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1173 context_set_fault_enable(*context); 1488 int found = 0;
1174 context_set_present(*context); 1489
1175 __iommu_flush_cache(iommu, context, sizeof(*context)); 1490 /* find an available domain id for this device in iommu */
1491 ndomains = cap_ndoms(iommu->cap);
1492 num = find_first_bit(iommu->domain_ids, ndomains);
1493 for (; num < ndomains; ) {
1494 if (iommu->domains[num] == domain) {
1495 id = num;
1496 found = 1;
1497 break;
1498 }
1499 num = find_next_bit(iommu->domain_ids,
1500 cap_ndoms(iommu->cap), num+1);
1501 }
1502
1503 if (found == 0) {
1504 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1505 if (num >= ndomains) {
1506 spin_unlock_irqrestore(&iommu->lock, flags);
1507 printk(KERN_ERR "IOMMU: no free domain ids\n");
1508 return -EFAULT;
1509 }
1510
1511 set_bit(num, iommu->domain_ids);
1512 iommu->domains[num] = domain;
1513 id = num;
1514 }
1515
1516 /* Skip top levels of page tables for
1517 * iommu which has less agaw than default.
1518 */
1519 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1520 pgd = phys_to_virt(dma_pte_addr(pgd));
1521 if (!dma_pte_present(pgd)) {
1522 spin_unlock_irqrestore(&iommu->lock, flags);
1523 return -ENOMEM;
1524 }
1525 }
1526 }
1527
1528 context_set_domain_id(context, id);
1529 context_set_address_width(context, iommu->agaw);
1530 context_set_address_root(context, virt_to_phys(pgd));
1531 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1532 context_set_fault_enable(context);
1533 context_set_present(context);
1534 domain_flush_cache(domain, context, sizeof(*context));
1176 1535
1177 /* it's a non-present to present mapping */ 1536 /* it's a non-present to present mapping */
1178 if (iommu->flush.flush_context(iommu, domain->id, 1537 if (iommu->flush.flush_context(iommu, domain->id,
@@ -1183,6 +1542,13 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1183 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); 1542 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1184 1543
1185 spin_unlock_irqrestore(&iommu->lock, flags); 1544 spin_unlock_irqrestore(&iommu->lock, flags);
1545
1546 spin_lock_irqsave(&domain->iommu_lock, flags);
1547 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1548 domain->iommu_count++;
1549 domain_update_iommu_coherency(domain);
1550 }
1551 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1186 return 0; 1552 return 0;
1187} 1553}
1188 1554
@@ -1218,13 +1584,17 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1218 tmp->bus->number, tmp->devfn); 1584 tmp->bus->number, tmp->devfn);
1219} 1585}
1220 1586
1221static int domain_context_mapped(struct dmar_domain *domain, 1587static int domain_context_mapped(struct pci_dev *pdev)
1222 struct pci_dev *pdev)
1223{ 1588{
1224 int ret; 1589 int ret;
1225 struct pci_dev *tmp, *parent; 1590 struct pci_dev *tmp, *parent;
1591 struct intel_iommu *iommu;
1592
1593 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1594 if (!iommu)
1595 return -ENODEV;
1226 1596
1227 ret = device_context_mapped(domain->iommu, 1597 ret = device_context_mapped(iommu,
1228 pdev->bus->number, pdev->devfn); 1598 pdev->bus->number, pdev->devfn);
1229 if (!ret) 1599 if (!ret)
1230 return ret; 1600 return ret;
@@ -1235,17 +1605,17 @@ static int domain_context_mapped(struct dmar_domain *domain,
1235 /* Secondary interface's bus number and devfn 0 */ 1605 /* Secondary interface's bus number and devfn 0 */
1236 parent = pdev->bus->self; 1606 parent = pdev->bus->self;
1237 while (parent != tmp) { 1607 while (parent != tmp) {
1238 ret = device_context_mapped(domain->iommu, parent->bus->number, 1608 ret = device_context_mapped(iommu, parent->bus->number,
1239 parent->devfn); 1609 parent->devfn);
1240 if (!ret) 1610 if (!ret)
1241 return ret; 1611 return ret;
1242 parent = parent->bus->self; 1612 parent = parent->bus->self;
1243 } 1613 }
1244 if (tmp->is_pcie) 1614 if (tmp->is_pcie)
1245 return device_context_mapped(domain->iommu, 1615 return device_context_mapped(iommu,
1246 tmp->subordinate->number, 0); 1616 tmp->subordinate->number, 0);
1247 else 1617 else
1248 return device_context_mapped(domain->iommu, 1618 return device_context_mapped(iommu,
1249 tmp->bus->number, tmp->devfn); 1619 tmp->bus->number, tmp->devfn);
1250} 1620}
1251 1621
@@ -1273,22 +1643,25 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1273 /* We don't need lock here, nobody else 1643 /* We don't need lock here, nobody else
1274 * touches the iova range 1644 * touches the iova range
1275 */ 1645 */
1276 BUG_ON(dma_pte_addr(*pte)); 1646 BUG_ON(dma_pte_addr(pte));
1277 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT); 1647 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1278 dma_set_pte_prot(*pte, prot); 1648 dma_set_pte_prot(pte, prot);
1279 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 1649 domain_flush_cache(domain, pte, sizeof(*pte));
1280 start_pfn++; 1650 start_pfn++;
1281 index++; 1651 index++;
1282 } 1652 }
1283 return 0; 1653 return 0;
1284} 1654}
1285 1655
1286static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) 1656static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1287{ 1657{
1288 clear_context_table(domain->iommu, bus, devfn); 1658 if (!iommu)
1289 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0, 1659 return;
1660
1661 clear_context_table(iommu, bus, devfn);
1662 iommu->flush.flush_context(iommu, 0, 0, 0,
1290 DMA_CCMD_GLOBAL_INVL, 0); 1663 DMA_CCMD_GLOBAL_INVL, 0);
1291 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0, 1664 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1292 DMA_TLB_GLOBAL_FLUSH, 0); 1665 DMA_TLB_GLOBAL_FLUSH, 0);
1293} 1666}
1294 1667
@@ -1296,6 +1669,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
1296{ 1669{
1297 struct device_domain_info *info; 1670 struct device_domain_info *info;
1298 unsigned long flags; 1671 unsigned long flags;
1672 struct intel_iommu *iommu;
1299 1673
1300 spin_lock_irqsave(&device_domain_lock, flags); 1674 spin_lock_irqsave(&device_domain_lock, flags);
1301 while (!list_empty(&domain->devices)) { 1675 while (!list_empty(&domain->devices)) {
@@ -1307,7 +1681,8 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
1307 info->dev->dev.archdata.iommu = NULL; 1681 info->dev->dev.archdata.iommu = NULL;
1308 spin_unlock_irqrestore(&device_domain_lock, flags); 1682 spin_unlock_irqrestore(&device_domain_lock, flags);
1309 1683
1310 detach_domain_for_dev(info->domain, info->bus, info->devfn); 1684 iommu = device_to_iommu(info->bus, info->devfn);
1685 iommu_detach_dev(iommu, info->bus, info->devfn);
1311 free_devinfo_mem(info); 1686 free_devinfo_mem(info);
1312 1687
1313 spin_lock_irqsave(&device_domain_lock, flags); 1688 spin_lock_irqsave(&device_domain_lock, flags);
@@ -1400,7 +1775,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1400 info->dev = NULL; 1775 info->dev = NULL;
1401 info->domain = domain; 1776 info->domain = domain;
1402 /* This domain is shared by devices under p2p bridge */ 1777 /* This domain is shared by devices under p2p bridge */
1403 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; 1778 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1404 1779
1405 /* pcie-to-pci bridge already has a domain, uses it */ 1780 /* pcie-to-pci bridge already has a domain, uses it */
1406 found = NULL; 1781 found = NULL;
@@ -1563,6 +1938,11 @@ static void __init iommu_prepare_gfx_mapping(void)
1563 printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); 1938 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1564 } 1939 }
1565} 1940}
1941#else /* !CONFIG_DMAR_GFX_WA */
1942static inline void iommu_prepare_gfx_mapping(void)
1943{
1944 return;
1945}
1566#endif 1946#endif
1567 1947
1568#ifdef CONFIG_DMAR_FLOPPY_WA 1948#ifdef CONFIG_DMAR_FLOPPY_WA
@@ -1590,7 +1970,7 @@ static inline void iommu_prepare_isa(void)
1590} 1970}
1591#endif /* !CONFIG_DMAR_FLPY_WA */ 1971#endif /* !CONFIG_DMAR_FLPY_WA */
1592 1972
1593int __init init_dmars(void) 1973static int __init init_dmars(void)
1594{ 1974{
1595 struct dmar_drhd_unit *drhd; 1975 struct dmar_drhd_unit *drhd;
1596 struct dmar_rmrr_unit *rmrr; 1976 struct dmar_rmrr_unit *rmrr;
@@ -1613,9 +1993,18 @@ int __init init_dmars(void)
1613 */ 1993 */
1614 } 1994 }
1615 1995
1996 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1997 GFP_KERNEL);
1998 if (!g_iommus) {
1999 printk(KERN_ERR "Allocating global iommu array failed\n");
2000 ret = -ENOMEM;
2001 goto error;
2002 }
2003
1616 deferred_flush = kzalloc(g_num_of_iommus * 2004 deferred_flush = kzalloc(g_num_of_iommus *
1617 sizeof(struct deferred_flush_tables), GFP_KERNEL); 2005 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1618 if (!deferred_flush) { 2006 if (!deferred_flush) {
2007 kfree(g_iommus);
1619 ret = -ENOMEM; 2008 ret = -ENOMEM;
1620 goto error; 2009 goto error;
1621 } 2010 }
@@ -1625,6 +2014,7 @@ int __init init_dmars(void)
1625 continue; 2014 continue;
1626 2015
1627 iommu = drhd->iommu; 2016 iommu = drhd->iommu;
2017 g_iommus[iommu->seq_id] = iommu;
1628 2018
1629 ret = iommu_init_domains(iommu); 2019 ret = iommu_init_domains(iommu);
1630 if (ret) 2020 if (ret)
@@ -1737,6 +2127,7 @@ error:
1737 iommu = drhd->iommu; 2127 iommu = drhd->iommu;
1738 free_iommu(iommu); 2128 free_iommu(iommu);
1739 } 2129 }
2130 kfree(g_iommus);
1740 return ret; 2131 return ret;
1741} 2132}
1742 2133
@@ -1805,7 +2196,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
1805 } 2196 }
1806 2197
1807 /* make sure context mapping is ok */ 2198 /* make sure context mapping is ok */
1808 if (unlikely(!domain_context_mapped(domain, pdev))) { 2199 if (unlikely(!domain_context_mapped(pdev))) {
1809 ret = domain_context_mapping(domain, pdev); 2200 ret = domain_context_mapping(domain, pdev);
1810 if (ret) { 2201 if (ret) {
1811 printk(KERN_ERR 2202 printk(KERN_ERR
@@ -1827,6 +2218,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1827 struct iova *iova; 2218 struct iova *iova;
1828 int prot = 0; 2219 int prot = 0;
1829 int ret; 2220 int ret;
2221 struct intel_iommu *iommu;
1830 2222
1831 BUG_ON(dir == DMA_NONE); 2223 BUG_ON(dir == DMA_NONE);
1832 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2224 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -1836,6 +2228,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1836 if (!domain) 2228 if (!domain)
1837 return 0; 2229 return 0;
1838 2230
2231 iommu = domain_get_iommu(domain);
1839 size = aligned_size((u64)paddr, size); 2232 size = aligned_size((u64)paddr, size);
1840 2233
1841 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2234 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
@@ -1849,7 +2242,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1849 * mappings.. 2242 * mappings..
1850 */ 2243 */
1851 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2244 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1852 !cap_zlr(domain->iommu->cap)) 2245 !cap_zlr(iommu->cap))
1853 prot |= DMA_PTE_READ; 2246 prot |= DMA_PTE_READ;
1854 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2247 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1855 prot |= DMA_PTE_WRITE; 2248 prot |= DMA_PTE_WRITE;
@@ -1865,10 +2258,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1865 goto error; 2258 goto error;
1866 2259
1867 /* it's a non-present to present mapping */ 2260 /* it's a non-present to present mapping */
1868 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, 2261 ret = iommu_flush_iotlb_psi(iommu, domain->id,
1869 start_paddr, size >> VTD_PAGE_SHIFT, 1); 2262 start_paddr, size >> VTD_PAGE_SHIFT, 1);
1870 if (ret) 2263 if (ret)
1871 iommu_flush_write_buffer(domain->iommu); 2264 iommu_flush_write_buffer(iommu);
1872 2265
1873 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2266 return start_paddr + ((u64)paddr & (~PAGE_MASK));
1874 2267
@@ -1895,10 +2288,11 @@ static void flush_unmaps(void)
1895 2288
1896 /* just flush them all */ 2289 /* just flush them all */
1897 for (i = 0; i < g_num_of_iommus; i++) { 2290 for (i = 0; i < g_num_of_iommus; i++) {
1898 if (deferred_flush[i].next) { 2291 struct intel_iommu *iommu = g_iommus[i];
1899 struct intel_iommu *iommu = 2292 if (!iommu)
1900 deferred_flush[i].domain[0]->iommu; 2293 continue;
1901 2294
2295 if (deferred_flush[i].next) {
1902 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2296 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1903 DMA_TLB_GLOBAL_FLUSH, 0); 2297 DMA_TLB_GLOBAL_FLUSH, 0);
1904 for (j = 0; j < deferred_flush[i].next; j++) { 2298 for (j = 0; j < deferred_flush[i].next; j++) {
@@ -1925,12 +2319,14 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1925{ 2319{
1926 unsigned long flags; 2320 unsigned long flags;
1927 int next, iommu_id; 2321 int next, iommu_id;
2322 struct intel_iommu *iommu;
1928 2323
1929 spin_lock_irqsave(&async_umap_flush_lock, flags); 2324 spin_lock_irqsave(&async_umap_flush_lock, flags);
1930 if (list_size == HIGH_WATER_MARK) 2325 if (list_size == HIGH_WATER_MARK)
1931 flush_unmaps(); 2326 flush_unmaps();
1932 2327
1933 iommu_id = dom->iommu->seq_id; 2328 iommu = domain_get_iommu(dom);
2329 iommu_id = iommu->seq_id;
1934 2330
1935 next = deferred_flush[iommu_id].next; 2331 next = deferred_flush[iommu_id].next;
1936 deferred_flush[iommu_id].domain[next] = dom; 2332 deferred_flush[iommu_id].domain[next] = dom;
@@ -1952,12 +2348,15 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
1952 struct dmar_domain *domain; 2348 struct dmar_domain *domain;
1953 unsigned long start_addr; 2349 unsigned long start_addr;
1954 struct iova *iova; 2350 struct iova *iova;
2351 struct intel_iommu *iommu;
1955 2352
1956 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2353 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1957 return; 2354 return;
1958 domain = find_domain(pdev); 2355 domain = find_domain(pdev);
1959 BUG_ON(!domain); 2356 BUG_ON(!domain);
1960 2357
2358 iommu = domain_get_iommu(domain);
2359
1961 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2360 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1962 if (!iova) 2361 if (!iova)
1963 return; 2362 return;
@@ -1973,9 +2372,9 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
1973 /* free page tables */ 2372 /* free page tables */
1974 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2373 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1975 if (intel_iommu_strict) { 2374 if (intel_iommu_strict) {
1976 if (iommu_flush_iotlb_psi(domain->iommu, 2375 if (iommu_flush_iotlb_psi(iommu,
1977 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) 2376 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
1978 iommu_flush_write_buffer(domain->iommu); 2377 iommu_flush_write_buffer(iommu);
1979 /* free iova */ 2378 /* free iova */
1980 __free_iova(&domain->iovad, iova); 2379 __free_iova(&domain->iovad, iova);
1981 } else { 2380 } else {
@@ -2036,11 +2435,15 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2036 size_t size = 0; 2435 size_t size = 0;
2037 void *addr; 2436 void *addr;
2038 struct scatterlist *sg; 2437 struct scatterlist *sg;
2438 struct intel_iommu *iommu;
2039 2439
2040 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2440 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2041 return; 2441 return;
2042 2442
2043 domain = find_domain(pdev); 2443 domain = find_domain(pdev);
2444 BUG_ON(!domain);
2445
2446 iommu = domain_get_iommu(domain);
2044 2447
2045 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2448 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2046 if (!iova) 2449 if (!iova)
@@ -2057,9 +2460,9 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2057 /* free page tables */ 2460 /* free page tables */
2058 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2461 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2059 2462
2060 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, 2463 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2061 size >> VTD_PAGE_SHIFT, 0)) 2464 size >> VTD_PAGE_SHIFT, 0))
2062 iommu_flush_write_buffer(domain->iommu); 2465 iommu_flush_write_buffer(iommu);
2063 2466
2064 /* free iova */ 2467 /* free iova */
2065 __free_iova(&domain->iovad, iova); 2468 __free_iova(&domain->iovad, iova);
@@ -2093,6 +2496,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2093 int ret; 2496 int ret;
2094 struct scatterlist *sg; 2497 struct scatterlist *sg;
2095 unsigned long start_addr; 2498 unsigned long start_addr;
2499 struct intel_iommu *iommu;
2096 2500
2097 BUG_ON(dir == DMA_NONE); 2501 BUG_ON(dir == DMA_NONE);
2098 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2502 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -2102,6 +2506,8 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2102 if (!domain) 2506 if (!domain)
2103 return 0; 2507 return 0;
2104 2508
2509 iommu = domain_get_iommu(domain);
2510
2105 for_each_sg(sglist, sg, nelems, i) { 2511 for_each_sg(sglist, sg, nelems, i) {
2106 addr = SG_ENT_VIRT_ADDRESS(sg); 2512 addr = SG_ENT_VIRT_ADDRESS(sg);
2107 addr = (void *)virt_to_phys(addr); 2513 addr = (void *)virt_to_phys(addr);
@@ -2119,7 +2525,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2119 * mappings.. 2525 * mappings..
2120 */ 2526 */
2121 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2527 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2122 !cap_zlr(domain->iommu->cap)) 2528 !cap_zlr(iommu->cap))
2123 prot |= DMA_PTE_READ; 2529 prot |= DMA_PTE_READ;
2124 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2530 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2125 prot |= DMA_PTE_WRITE; 2531 prot |= DMA_PTE_WRITE;
@@ -2151,9 +2557,9 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2151 } 2557 }
2152 2558
2153 /* it's a non-present to present mapping */ 2559 /* it's a non-present to present mapping */
2154 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, 2560 if (iommu_flush_iotlb_psi(iommu, domain->id,
2155 start_addr, offset >> VTD_PAGE_SHIFT, 1)) 2561 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2156 iommu_flush_write_buffer(domain->iommu); 2562 iommu_flush_write_buffer(iommu);
2157 return nelems; 2563 return nelems;
2158} 2564}
2159 2565
@@ -2325,10 +2731,220 @@ int __init intel_iommu_init(void)
2325 init_timer(&unmap_timer); 2731 init_timer(&unmap_timer);
2326 force_iommu = 1; 2732 force_iommu = 1;
2327 dma_ops = &intel_dma_ops; 2733 dma_ops = &intel_dma_ops;
2734
2735 register_iommu(&intel_iommu_ops);
2736
2737 return 0;
2738}
2739
2740static int vm_domain_add_dev_info(struct dmar_domain *domain,
2741 struct pci_dev *pdev)
2742{
2743 struct device_domain_info *info;
2744 unsigned long flags;
2745
2746 info = alloc_devinfo_mem();
2747 if (!info)
2748 return -ENOMEM;
2749
2750 info->bus = pdev->bus->number;
2751 info->devfn = pdev->devfn;
2752 info->dev = pdev;
2753 info->domain = domain;
2754
2755 spin_lock_irqsave(&device_domain_lock, flags);
2756 list_add(&info->link, &domain->devices);
2757 list_add(&info->global, &device_domain_list);
2758 pdev->dev.archdata.iommu = info;
2759 spin_unlock_irqrestore(&device_domain_lock, flags);
2760
2761 return 0;
2762}
2763
2764static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2765 struct pci_dev *pdev)
2766{
2767 struct device_domain_info *info;
2768 struct intel_iommu *iommu;
2769 unsigned long flags;
2770 int found = 0;
2771 struct list_head *entry, *tmp;
2772
2773 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2774 if (!iommu)
2775 return;
2776
2777 spin_lock_irqsave(&device_domain_lock, flags);
2778 list_for_each_safe(entry, tmp, &domain->devices) {
2779 info = list_entry(entry, struct device_domain_info, link);
2780 if (info->bus == pdev->bus->number &&
2781 info->devfn == pdev->devfn) {
2782 list_del(&info->link);
2783 list_del(&info->global);
2784 if (info->dev)
2785 info->dev->dev.archdata.iommu = NULL;
2786 spin_unlock_irqrestore(&device_domain_lock, flags);
2787
2788 iommu_detach_dev(iommu, info->bus, info->devfn);
2789 free_devinfo_mem(info);
2790
2791 spin_lock_irqsave(&device_domain_lock, flags);
2792
2793 if (found)
2794 break;
2795 else
2796 continue;
2797 }
2798
2799 /* if there is no other devices under the same iommu
2800 * owned by this domain, clear this iommu in iommu_bmp
2801 * update iommu count and coherency
2802 */
2803 if (device_to_iommu(info->bus, info->devfn) == iommu)
2804 found = 1;
2805 }
2806
2807 if (found == 0) {
2808 unsigned long tmp_flags;
2809 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2810 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2811 domain->iommu_count--;
2812 domain_update_iommu_coherency(domain);
2813 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2814 }
2815
2816 spin_unlock_irqrestore(&device_domain_lock, flags);
2817}
2818
2819static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2820{
2821 struct device_domain_info *info;
2822 struct intel_iommu *iommu;
2823 unsigned long flags1, flags2;
2824
2825 spin_lock_irqsave(&device_domain_lock, flags1);
2826 while (!list_empty(&domain->devices)) {
2827 info = list_entry(domain->devices.next,
2828 struct device_domain_info, link);
2829 list_del(&info->link);
2830 list_del(&info->global);
2831 if (info->dev)
2832 info->dev->dev.archdata.iommu = NULL;
2833
2834 spin_unlock_irqrestore(&device_domain_lock, flags1);
2835
2836 iommu = device_to_iommu(info->bus, info->devfn);
2837 iommu_detach_dev(iommu, info->bus, info->devfn);
2838
2839 /* clear this iommu in iommu_bmp, update iommu count
2840 * and coherency
2841 */
2842 spin_lock_irqsave(&domain->iommu_lock, flags2);
2843 if (test_and_clear_bit(iommu->seq_id,
2844 &domain->iommu_bmp)) {
2845 domain->iommu_count--;
2846 domain_update_iommu_coherency(domain);
2847 }
2848 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2849
2850 free_devinfo_mem(info);
2851 spin_lock_irqsave(&device_domain_lock, flags1);
2852 }
2853 spin_unlock_irqrestore(&device_domain_lock, flags1);
2854}
2855
2856/* domain id for virtual machine, it won't be set in context */
2857static unsigned long vm_domid;
2858
2859static int vm_domain_min_agaw(struct dmar_domain *domain)
2860{
2861 int i;
2862 int min_agaw = domain->agaw;
2863
2864 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
2865 for (; i < g_num_of_iommus; ) {
2866 if (min_agaw > g_iommus[i]->agaw)
2867 min_agaw = g_iommus[i]->agaw;
2868
2869 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
2870 }
2871
2872 return min_agaw;
2873}
2874
2875static struct dmar_domain *iommu_alloc_vm_domain(void)
2876{
2877 struct dmar_domain *domain;
2878
2879 domain = alloc_domain_mem();
2880 if (!domain)
2881 return NULL;
2882
2883 domain->id = vm_domid++;
2884 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2885 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2886
2887 return domain;
2888}
2889
2890static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2891{
2892 int adjust_width;
2893
2894 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2895 spin_lock_init(&domain->mapping_lock);
2896 spin_lock_init(&domain->iommu_lock);
2897
2898 domain_reserve_special_ranges(domain);
2899
2900 /* calculate AGAW */
2901 domain->gaw = guest_width;
2902 adjust_width = guestwidth_to_adjustwidth(guest_width);
2903 domain->agaw = width_to_agaw(adjust_width);
2904
2905 INIT_LIST_HEAD(&domain->devices);
2906
2907 domain->iommu_count = 0;
2908 domain->iommu_coherency = 0;
2909 domain->max_addr = 0;
2910
2911 /* always allocate the top pgd */
2912 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2913 if (!domain->pgd)
2914 return -ENOMEM;
2915 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2328 return 0; 2916 return 0;
2329} 2917}
2330 2918
2331void intel_iommu_domain_exit(struct dmar_domain *domain) 2919static void iommu_free_vm_domain(struct dmar_domain *domain)
2920{
2921 unsigned long flags;
2922 struct dmar_drhd_unit *drhd;
2923 struct intel_iommu *iommu;
2924 unsigned long i;
2925 unsigned long ndomains;
2926
2927 for_each_drhd_unit(drhd) {
2928 if (drhd->ignored)
2929 continue;
2930 iommu = drhd->iommu;
2931
2932 ndomains = cap_ndoms(iommu->cap);
2933 i = find_first_bit(iommu->domain_ids, ndomains);
2934 for (; i < ndomains; ) {
2935 if (iommu->domains[i] == domain) {
2936 spin_lock_irqsave(&iommu->lock, flags);
2937 clear_bit(i, iommu->domain_ids);
2938 iommu->domains[i] = NULL;
2939 spin_unlock_irqrestore(&iommu->lock, flags);
2940 break;
2941 }
2942 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2943 }
2944 }
2945}
2946
2947static void vm_domain_exit(struct dmar_domain *domain)
2332{ 2948{
2333 u64 end; 2949 u64 end;
2334 2950
@@ -2336,6 +2952,9 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
2336 if (!domain) 2952 if (!domain)
2337 return; 2953 return;
2338 2954
2955 vm_domain_remove_all_dev_info(domain);
2956 /* destroy iovas */
2957 put_iova_domain(&domain->iovad);
2339 end = DOMAIN_MAX_ADDR(domain->gaw); 2958 end = DOMAIN_MAX_ADDR(domain->gaw);
2340 end = end & (~VTD_PAGE_MASK); 2959 end = end & (~VTD_PAGE_MASK);
2341 2960
@@ -2345,94 +2964,167 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
2345 /* free page tables */ 2964 /* free page tables */
2346 dma_pte_free_pagetable(domain, 0, end); 2965 dma_pte_free_pagetable(domain, 0, end);
2347 2966
2348 iommu_free_domain(domain); 2967 iommu_free_vm_domain(domain);
2349 free_domain_mem(domain); 2968 free_domain_mem(domain);
2350} 2969}
2351EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2352 2970
2353struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev) 2971static int intel_iommu_domain_init(struct iommu_domain *domain)
2354{ 2972{
2355 struct dmar_drhd_unit *drhd; 2973 struct dmar_domain *dmar_domain;
2356 struct dmar_domain *domain;
2357 struct intel_iommu *iommu;
2358
2359 drhd = dmar_find_matched_drhd_unit(pdev);
2360 if (!drhd) {
2361 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2362 return NULL;
2363 }
2364 2974
2365 iommu = drhd->iommu; 2975 dmar_domain = iommu_alloc_vm_domain();
2366 if (!iommu) { 2976 if (!dmar_domain) {
2367 printk(KERN_ERR
2368 "intel_iommu_domain_alloc: iommu == NULL\n");
2369 return NULL;
2370 }
2371 domain = iommu_alloc_domain(iommu);
2372 if (!domain) {
2373 printk(KERN_ERR 2977 printk(KERN_ERR
2374 "intel_iommu_domain_alloc: domain == NULL\n"); 2978 "intel_iommu_domain_init: dmar_domain == NULL\n");
2375 return NULL; 2979 return -ENOMEM;
2376 } 2980 }
2377 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2981 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2378 printk(KERN_ERR 2982 printk(KERN_ERR
2379 "intel_iommu_domain_alloc: domain_init() failed\n"); 2983 "intel_iommu_domain_init() failed\n");
2380 intel_iommu_domain_exit(domain); 2984 vm_domain_exit(dmar_domain);
2381 return NULL; 2985 return -ENOMEM;
2382 } 2986 }
2383 return domain; 2987 domain->priv = dmar_domain;
2988
2989 return 0;
2384} 2990}
2385EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2386 2991
2387int intel_iommu_context_mapping( 2992static void intel_iommu_domain_destroy(struct iommu_domain *domain)
2388 struct dmar_domain *domain, struct pci_dev *pdev)
2389{ 2993{
2390 int rc; 2994 struct dmar_domain *dmar_domain = domain->priv;
2391 rc = domain_context_mapping(domain, pdev); 2995
2392 return rc; 2996 domain->priv = NULL;
2997 vm_domain_exit(dmar_domain);
2393} 2998}
2394EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2395 2999
2396int intel_iommu_page_mapping( 3000static int intel_iommu_attach_device(struct iommu_domain *domain,
2397 struct dmar_domain *domain, dma_addr_t iova, 3001 struct device *dev)
2398 u64 hpa, size_t size, int prot)
2399{ 3002{
2400 int rc; 3003 struct dmar_domain *dmar_domain = domain->priv;
2401 rc = domain_page_mapping(domain, iova, hpa, size, prot); 3004 struct pci_dev *pdev = to_pci_dev(dev);
2402 return rc; 3005 struct intel_iommu *iommu;
3006 int addr_width;
3007 u64 end;
3008 int ret;
3009
3010 /* normally pdev is not mapped */
3011 if (unlikely(domain_context_mapped(pdev))) {
3012 struct dmar_domain *old_domain;
3013
3014 old_domain = find_domain(pdev);
3015 if (old_domain) {
3016 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
3017 vm_domain_remove_one_dev_info(old_domain, pdev);
3018 else
3019 domain_remove_dev_info(old_domain);
3020 }
3021 }
3022
3023 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
3024 if (!iommu)
3025 return -ENODEV;
3026
3027 /* check if this iommu agaw is sufficient for max mapped address */
3028 addr_width = agaw_to_width(iommu->agaw);
3029 end = DOMAIN_MAX_ADDR(addr_width);
3030 end = end & VTD_PAGE_MASK;
3031 if (end < dmar_domain->max_addr) {
3032 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3033 "sufficient for the mapped address (%llx)\n",
3034 __func__, iommu->agaw, dmar_domain->max_addr);
3035 return -EFAULT;
3036 }
3037
3038 ret = domain_context_mapping(dmar_domain, pdev);
3039 if (ret)
3040 return ret;
3041
3042 ret = vm_domain_add_dev_info(dmar_domain, pdev);
3043 return ret;
2403} 3044}
2404EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2405 3045
2406void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) 3046static void intel_iommu_detach_device(struct iommu_domain *domain,
3047 struct device *dev)
2407{ 3048{
2408 detach_domain_for_dev(domain, bus, devfn); 3049 struct dmar_domain *dmar_domain = domain->priv;
3050 struct pci_dev *pdev = to_pci_dev(dev);
3051
3052 vm_domain_remove_one_dev_info(dmar_domain, pdev);
2409} 3053}
2410EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2411 3054
2412struct dmar_domain * 3055static int intel_iommu_map_range(struct iommu_domain *domain,
2413intel_iommu_find_domain(struct pci_dev *pdev) 3056 unsigned long iova, phys_addr_t hpa,
3057 size_t size, int iommu_prot)
2414{ 3058{
2415 return find_domain(pdev); 3059 struct dmar_domain *dmar_domain = domain->priv;
3060 u64 max_addr;
3061 int addr_width;
3062 int prot = 0;
3063 int ret;
3064
3065 if (iommu_prot & IOMMU_READ)
3066 prot |= DMA_PTE_READ;
3067 if (iommu_prot & IOMMU_WRITE)
3068 prot |= DMA_PTE_WRITE;
3069
3070 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
3071 if (dmar_domain->max_addr < max_addr) {
3072 int min_agaw;
3073 u64 end;
3074
3075 /* check if minimum agaw is sufficient for mapped address */
3076 min_agaw = vm_domain_min_agaw(dmar_domain);
3077 addr_width = agaw_to_width(min_agaw);
3078 end = DOMAIN_MAX_ADDR(addr_width);
3079 end = end & VTD_PAGE_MASK;
3080 if (end < max_addr) {
3081 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3082 "sufficient for the mapped address (%llx)\n",
3083 __func__, min_agaw, max_addr);
3084 return -EFAULT;
3085 }
3086 dmar_domain->max_addr = max_addr;
3087 }
3088
3089 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
3090 return ret;
2416} 3091}
2417EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2418 3092
2419int intel_iommu_found(void) 3093static void intel_iommu_unmap_range(struct iommu_domain *domain,
3094 unsigned long iova, size_t size)
2420{ 3095{
2421 return g_num_of_iommus; 3096 struct dmar_domain *dmar_domain = domain->priv;
3097 dma_addr_t base;
3098
3099 /* The address might not be aligned */
3100 base = iova & VTD_PAGE_MASK;
3101 size = VTD_PAGE_ALIGN(size);
3102 dma_pte_clear_range(dmar_domain, base, base + size);
3103
3104 if (dmar_domain->max_addr == base + size)
3105 dmar_domain->max_addr = base;
2422} 3106}
2423EXPORT_SYMBOL_GPL(intel_iommu_found);
2424 3107
2425u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) 3108static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3109 unsigned long iova)
2426{ 3110{
3111 struct dmar_domain *dmar_domain = domain->priv;
2427 struct dma_pte *pte; 3112 struct dma_pte *pte;
2428 u64 pfn; 3113 u64 phys = 0;
2429
2430 pfn = 0;
2431 pte = addr_to_dma_pte(domain, iova);
2432 3114
3115 pte = addr_to_dma_pte(dmar_domain, iova);
2433 if (pte) 3116 if (pte)
2434 pfn = dma_pte_addr(*pte); 3117 phys = dma_pte_addr(pte);
2435 3118
2436 return pfn >> VTD_PAGE_SHIFT; 3119 return phys;
2437} 3120}
2438EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); 3121
3122static struct iommu_ops intel_iommu_ops = {
3123 .domain_init = intel_iommu_domain_init,
3124 .domain_destroy = intel_iommu_domain_destroy,
3125 .attach_dev = intel_iommu_attach_device,
3126 .detach_dev = intel_iommu_detach_device,
3127 .map = intel_iommu_map_range,
3128 .unmap = intel_iommu_unmap_range,
3129 .iova_to_phys = intel_iommu_iova_to_phys,
3130};
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 952df39c989d..136f170cecc2 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,148 +9,16 @@
9#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) 9#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
10#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) 10#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
11 11
12#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
13#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
14#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
15
16
17/*
18 * 0: Present
19 * 1-11: Reserved
20 * 12-63: Context Ptr (12 - (haw-1))
21 * 64-127: Reserved
22 */
23struct root_entry {
24 u64 val;
25 u64 rsvd1;
26};
27#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
28static inline bool root_present(struct root_entry *root)
29{
30 return (root->val & 1);
31}
32static inline void set_root_present(struct root_entry *root)
33{
34 root->val |= 1;
35}
36static inline void set_root_value(struct root_entry *root, unsigned long value)
37{
38 root->val |= value & VTD_PAGE_MASK;
39}
40
41struct context_entry;
42static inline struct context_entry *
43get_context_addr_from_root(struct root_entry *root)
44{
45 return (struct context_entry *)
46 (root_present(root)?phys_to_virt(
47 root->val & VTD_PAGE_MASK) :
48 NULL);
49}
50
51/*
52 * low 64 bits:
53 * 0: present
54 * 1: fault processing disable
55 * 2-3: translation type
56 * 12-63: address space root
57 * high 64 bits:
58 * 0-2: address width
59 * 3-6: aval
60 * 8-23: domain id
61 */
62struct context_entry {
63 u64 lo;
64 u64 hi;
65};
66#define context_present(c) ((c).lo & 1)
67#define context_fault_disable(c) (((c).lo >> 1) & 1)
68#define context_translation_type(c) (((c).lo >> 2) & 3)
69#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
70#define context_address_width(c) ((c).hi & 7)
71#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
72
73#define context_set_present(c) do {(c).lo |= 1;} while (0)
74#define context_set_fault_enable(c) \
75 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
76#define context_set_translation_type(c, val) \
77 do { \
78 (c).lo &= (((u64)-1) << 4) | 3; \
79 (c).lo |= ((val) & 3) << 2; \
80 } while (0)
81#define CONTEXT_TT_MULTI_LEVEL 0
82#define context_set_address_root(c, val) \
83 do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
84#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
85#define context_set_domain_id(c, val) \
86 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
87#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
88
89/*
90 * 0: readable
91 * 1: writable
92 * 2-6: reserved
93 * 7: super page
94 * 8-11: available
95 * 12-63: Host physcial address
96 */
97struct dma_pte {
98 u64 val;
99};
100#define dma_clear_pte(p) do {(p).val = 0;} while (0)
101
102#define DMA_PTE_READ (1) 12#define DMA_PTE_READ (1)
103#define DMA_PTE_WRITE (2) 13#define DMA_PTE_WRITE (2)
104 14
105#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
106#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
107#define dma_set_pte_prot(p, prot) \
108 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
109#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
110#define dma_set_pte_addr(p, addr) do {\
111 (p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
112#define dma_pte_present(p) (((p).val & 3) != 0)
113
114struct intel_iommu; 15struct intel_iommu;
16struct dmar_domain;
17struct root_entry;
115 18
116struct dmar_domain {
117 int id; /* domain id */
118 struct intel_iommu *iommu; /* back pointer to owning iommu */
119
120 struct list_head devices; /* all devices' list */
121 struct iova_domain iovad; /* iova's that belong to this domain */
122
123 struct dma_pte *pgd; /* virtual address */
124 spinlock_t mapping_lock; /* page table lock */
125 int gaw; /* max guest address width */
126
127 /* adjusted guest address width, 0 is level 2 30-bit */
128 int agaw;
129
130#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
131 int flags;
132};
133
134/* PCI domain-device relationship */
135struct device_domain_info {
136 struct list_head link; /* link to domain siblings */
137 struct list_head global; /* link to global list */
138 u8 bus; /* PCI bus numer */
139 u8 devfn; /* PCI devfn number */
140 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
141 struct dmar_domain *domain; /* pointer to domain */
142};
143
144extern int init_dmars(void);
145extern void free_dmar_iommu(struct intel_iommu *iommu); 19extern void free_dmar_iommu(struct intel_iommu *iommu);
20extern int iommu_calculate_agaw(struct intel_iommu *iommu);
146 21
147extern int dmar_disabled; 22extern int dmar_disabled;
148 23
149#ifndef CONFIG_DMAR_GFX_WA
150static inline void iommu_prepare_gfx_mapping(void)
151{
152 return;
153}
154#endif /* !CONFIG_DMAR_GFX_WA */
155
156#endif 24#endif
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index f1984fc3e06d..f28440784cf0 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -144,7 +144,6 @@ struct dmar_rmrr_unit {
144 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 144 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
145/* Intel DMAR initialization functions */ 145/* Intel DMAR initialization functions */
146extern int intel_iommu_init(void); 146extern int intel_iommu_init(void);
147extern int dmar_disabled;
148#else 147#else
149static inline int intel_iommu_init(void) 148static inline int intel_iommu_init(void)
150{ 149{
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3d017cfd245b..c4f6c101dbcd 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -23,8 +23,6 @@
23#define _INTEL_IOMMU_H_ 23#define _INTEL_IOMMU_H_
24 24
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/msi.h>
27#include <linux/sysdev.h>
28#include <linux/iova.h> 26#include <linux/iova.h>
29#include <linux/io.h> 27#include <linux/io.h>
30#include <linux/dma_remapping.h> 28#include <linux/dma_remapping.h>
@@ -289,10 +287,10 @@ struct intel_iommu {
289 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 287 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
290 u64 cap; 288 u64 cap;
291 u64 ecap; 289 u64 ecap;
292 int seg;
293 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 290 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
294 spinlock_t register_lock; /* protect register handling */ 291 spinlock_t register_lock; /* protect register handling */
295 int seq_id; /* sequence id of the iommu */ 292 int seq_id; /* sequence id of the iommu */
293 int agaw; /* agaw of this iommu */
296 294
297#ifdef CONFIG_DMAR 295#ifdef CONFIG_DMAR
298 unsigned long *domain_ids; /* bitmap of domains */ 296 unsigned long *domain_ids; /* bitmap of domains */
@@ -302,8 +300,6 @@ struct intel_iommu {
302 300
303 unsigned int irq; 301 unsigned int irq;
304 unsigned char name[7]; /* Device Name */ 302 unsigned char name[7]; /* Device Name */
305 struct msi_msg saved_msg;
306 struct sys_device sysdev;
307 struct iommu_flush flush; 303 struct iommu_flush flush;
308#endif 304#endif
309 struct q_inval *qi; /* Queued invalidation info */ 305 struct q_inval *qi; /* Queued invalidation info */
@@ -334,25 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
334 330
335extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); 331extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
336 332
337void intel_iommu_domain_exit(struct dmar_domain *domain);
338struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
339int intel_iommu_context_mapping(struct dmar_domain *domain,
340 struct pci_dev *pdev);
341int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
342 u64 hpa, size_t size, int prot);
343void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
344struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
345u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
346
347#ifdef CONFIG_DMAR
348int intel_iommu_found(void);
349#else /* CONFIG_DMAR */
350static inline int intel_iommu_found(void)
351{
352 return 0;
353}
354#endif /* CONFIG_DMAR */
355
356extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); 333extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
357extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); 334extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
358extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int); 335extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
new file mode 100644
index 000000000000..8a7bfb1b6ca0
--- /dev/null
+++ b/include/linux/iommu.h
@@ -0,0 +1,112 @@
1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef __LINUX_IOMMU_H
20#define __LINUX_IOMMU_H
21
22#define IOMMU_READ (1)
23#define IOMMU_WRITE (2)
24
25struct device;
26
27struct iommu_domain {
28 void *priv;
29};
30
31struct iommu_ops {
32 int (*domain_init)(struct iommu_domain *domain);
33 void (*domain_destroy)(struct iommu_domain *domain);
34 int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
35 void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
36 int (*map)(struct iommu_domain *domain, unsigned long iova,
37 phys_addr_t paddr, size_t size, int prot);
38 void (*unmap)(struct iommu_domain *domain, unsigned long iova,
39 size_t size);
40 phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
41 unsigned long iova);
42};
43
44#ifdef CONFIG_IOMMU_API
45
46extern void register_iommu(struct iommu_ops *ops);
47extern bool iommu_found(void);
48extern struct iommu_domain *iommu_domain_alloc(void);
49extern void iommu_domain_free(struct iommu_domain *domain);
50extern int iommu_attach_device(struct iommu_domain *domain,
51 struct device *dev);
52extern void iommu_detach_device(struct iommu_domain *domain,
53 struct device *dev);
54extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
55 phys_addr_t paddr, size_t size, int prot);
56extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
57 size_t size);
58extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
59 unsigned long iova);
60
61#else /* CONFIG_IOMMU_API */
62
63static inline void register_iommu(struct iommu_ops *ops)
64{
65}
66
67static inline bool iommu_found(void)
68{
69 return false;
70}
71
72static inline struct iommu_domain *iommu_domain_alloc(void)
73{
74 return NULL;
75}
76
77static inline void iommu_domain_free(struct iommu_domain *domain)
78{
79}
80
81static inline int iommu_attach_device(struct iommu_domain *domain,
82 struct device *dev)
83{
84 return -ENODEV;
85}
86
87static inline void iommu_detach_device(struct iommu_domain *domain,
88 struct device *dev)
89{
90}
91
92static inline int iommu_map_range(struct iommu_domain *domain,
93 unsigned long iova, phys_addr_t paddr,
94 size_t size, int prot)
95{
96 return -ENODEV;
97}
98
99static inline void iommu_unmap_range(struct iommu_domain *domain,
100 unsigned long iova, size_t size)
101{
102}
103
104static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
105 unsigned long iova)
106{
107 return 0;
108}
109
110#endif /* CONFIG_IOMMU_API */
111
112#endif /* __LINUX_IOMMU_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eafabd5c66b2..ec49d0be7f52 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -316,6 +316,7 @@ struct kvm_assigned_dev_kernel {
316#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) 316#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
317 unsigned long irq_requested_type; 317 unsigned long irq_requested_type;
318 int irq_source_id; 318 int irq_source_id;
319 int flags;
319 struct pci_dev *dev; 320 struct pci_dev *dev;
320 struct kvm *kvm; 321 struct kvm *kvm;
321}; 322};
@@ -327,13 +328,16 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
327int kvm_request_irq_source_id(struct kvm *kvm); 328int kvm_request_irq_source_id(struct kvm *kvm);
328void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 329void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
329 330
330#ifdef CONFIG_DMAR 331#ifdef CONFIG_IOMMU_API
331int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, 332int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
332 unsigned long npages); 333 unsigned long npages);
333int kvm_iommu_map_guest(struct kvm *kvm, 334int kvm_iommu_map_guest(struct kvm *kvm);
334 struct kvm_assigned_dev_kernel *assigned_dev);
335int kvm_iommu_unmap_guest(struct kvm *kvm); 335int kvm_iommu_unmap_guest(struct kvm *kvm);
336#else /* CONFIG_DMAR */ 336int kvm_assign_device(struct kvm *kvm,
337 struct kvm_assigned_dev_kernel *assigned_dev);
338int kvm_deassign_device(struct kvm *kvm,
339 struct kvm_assigned_dev_kernel *assigned_dev);
340#else /* CONFIG_IOMMU_API */
337static inline int kvm_iommu_map_pages(struct kvm *kvm, 341static inline int kvm_iommu_map_pages(struct kvm *kvm,
338 gfn_t base_gfn, 342 gfn_t base_gfn,
339 unsigned long npages) 343 unsigned long npages)
@@ -341,9 +345,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm,
341 return 0; 345 return 0;
342} 346}
343 347
344static inline int kvm_iommu_map_guest(struct kvm *kvm, 348static inline int kvm_iommu_map_guest(struct kvm *kvm)
345 struct kvm_assigned_dev_kernel
346 *assigned_dev)
347{ 349{
348 return -ENODEV; 350 return -ENODEV;
349} 351}
@@ -352,7 +354,19 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
352{ 354{
353 return 0; 355 return 0;
354} 356}
355#endif /* CONFIG_DMAR */ 357
358static inline int kvm_assign_device(struct kvm *kvm,
359 struct kvm_assigned_dev_kernel *assigned_dev)
360{
361 return 0;
362}
363
364static inline int kvm_deassign_device(struct kvm *kvm,
365 struct kvm_assigned_dev_kernel *assigned_dev)
366{
367 return 0;
368}
369#endif /* CONFIG_IOMMU_API */
356 370
357static inline void kvm_guest_enter(void) 371static inline void kvm_guest_enter(void)
358{ 372{
diff --git a/virt/kvm/vtd.c b/virt/kvm/iommu.c
index a770874f3a3a..e9693a29d00e 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/iommu.c
@@ -25,6 +25,7 @@
25#include <linux/kvm_host.h> 25#include <linux/kvm_host.h>
26#include <linux/pci.h> 26#include <linux/pci.h>
27#include <linux/dmar.h> 27#include <linux/dmar.h>
28#include <linux/iommu.h>
28#include <linux/intel-iommu.h> 29#include <linux/intel-iommu.h>
29 30
30static int kvm_iommu_unmap_memslots(struct kvm *kvm); 31static int kvm_iommu_unmap_memslots(struct kvm *kvm);
@@ -37,7 +38,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
37 gfn_t gfn = base_gfn; 38 gfn_t gfn = base_gfn;
38 pfn_t pfn; 39 pfn_t pfn;
39 int i, r = 0; 40 int i, r = 0;
40 struct dmar_domain *domain = kvm->arch.intel_iommu_domain; 41 struct iommu_domain *domain = kvm->arch.iommu_domain;
41 42
42 /* check if iommu exists and in use */ 43 /* check if iommu exists and in use */
43 if (!domain) 44 if (!domain)
@@ -45,20 +46,17 @@ int kvm_iommu_map_pages(struct kvm *kvm,
45 46
46 for (i = 0; i < npages; i++) { 47 for (i = 0; i < npages; i++) {
47 /* check if already mapped */ 48 /* check if already mapped */
48 pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, 49 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
49 gfn_to_gpa(gfn));
50 if (pfn)
51 continue; 50 continue;
52 51
53 pfn = gfn_to_pfn(kvm, gfn); 52 pfn = gfn_to_pfn(kvm, gfn);
54 r = intel_iommu_page_mapping(domain, 53 r = iommu_map_range(domain,
55 gfn_to_gpa(gfn), 54 gfn_to_gpa(gfn),
56 pfn_to_hpa(pfn), 55 pfn_to_hpa(pfn),
57 PAGE_SIZE, 56 PAGE_SIZE,
58 DMA_PTE_READ | 57 IOMMU_READ | IOMMU_WRITE);
59 DMA_PTE_WRITE);
60 if (r) { 58 if (r) {
61 printk(KERN_ERR "kvm_iommu_map_pages:" 59 printk(KERN_ERR "kvm_iommu_map_address:"
62 "iommu failed to map pfn=%lx\n", pfn); 60 "iommu failed to map pfn=%lx\n", pfn);
63 goto unmap_pages; 61 goto unmap_pages;
64 } 62 }
@@ -73,7 +71,7 @@ unmap_pages:
73 71
74static int kvm_iommu_map_memslots(struct kvm *kvm) 72static int kvm_iommu_map_memslots(struct kvm *kvm)
75{ 73{
76 int i, r; 74 int i, r = 0;
77 75
78 down_read(&kvm->slots_lock); 76 down_read(&kvm->slots_lock);
79 for (i = 0; i < kvm->nmemslots; i++) { 77 for (i = 0; i < kvm->nmemslots; i++) {
@@ -86,50 +84,79 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
86 return r; 84 return r;
87} 85}
88 86
89int kvm_iommu_map_guest(struct kvm *kvm, 87int kvm_assign_device(struct kvm *kvm,
90 struct kvm_assigned_dev_kernel *assigned_dev) 88 struct kvm_assigned_dev_kernel *assigned_dev)
91{ 89{
92 struct pci_dev *pdev = NULL; 90 struct pci_dev *pdev = NULL;
91 struct iommu_domain *domain = kvm->arch.iommu_domain;
93 int r; 92 int r;
94 93
95 if (!intel_iommu_found()) { 94 /* check if iommu exists and in use */
96 printk(KERN_ERR "%s: intel iommu not found\n", __func__); 95 if (!domain)
96 return 0;
97
98 pdev = assigned_dev->dev;
99 if (pdev == NULL)
97 return -ENODEV; 100 return -ENODEV;
101
102 r = iommu_attach_device(domain, &pdev->dev);
103 if (r) {
104 printk(KERN_ERR "assign device %x:%x.%x failed",
105 pdev->bus->number,
106 PCI_SLOT(pdev->devfn),
107 PCI_FUNC(pdev->devfn));
108 return r;
98 } 109 }
99 110
100 printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", 111 printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
101 assigned_dev->host_busnr, 112 assigned_dev->host_busnr,
102 PCI_SLOT(assigned_dev->host_devfn), 113 PCI_SLOT(assigned_dev->host_devfn),
103 PCI_FUNC(assigned_dev->host_devfn)); 114 PCI_FUNC(assigned_dev->host_devfn));
115
116 return 0;
117}
118
119int kvm_deassign_device(struct kvm *kvm,
120 struct kvm_assigned_dev_kernel *assigned_dev)
121{
122 struct iommu_domain *domain = kvm->arch.iommu_domain;
123 struct pci_dev *pdev = NULL;
124
125 /* check if iommu exists and in use */
126 if (!domain)
127 return 0;
104 128
105 pdev = assigned_dev->dev; 129 pdev = assigned_dev->dev;
130 if (pdev == NULL)
131 return -ENODEV;
106 132
107 if (pdev == NULL) { 133 iommu_detach_device(domain, &pdev->dev);
108 if (kvm->arch.intel_iommu_domain) { 134
109 intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); 135 printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
110 kvm->arch.intel_iommu_domain = NULL; 136 assigned_dev->host_busnr,
111 } 137 PCI_SLOT(assigned_dev->host_devfn),
138 PCI_FUNC(assigned_dev->host_devfn));
139
140 return 0;
141}
142
143int kvm_iommu_map_guest(struct kvm *kvm)
144{
145 int r;
146
147 if (!iommu_found()) {
148 printk(KERN_ERR "%s: iommu not found\n", __func__);
112 return -ENODEV; 149 return -ENODEV;
113 } 150 }
114 151
115 kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); 152 kvm->arch.iommu_domain = iommu_domain_alloc();
116 if (!kvm->arch.intel_iommu_domain) 153 if (!kvm->arch.iommu_domain)
117 return -ENODEV; 154 return -ENOMEM;
118 155
119 r = kvm_iommu_map_memslots(kvm); 156 r = kvm_iommu_map_memslots(kvm);
120 if (r) 157 if (r)
121 goto out_unmap; 158 goto out_unmap;
122 159
123 intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
124 pdev->bus->number, pdev->devfn);
125
126 r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
127 pdev);
128 if (r) {
129 printk(KERN_ERR "Domain context map for %s failed",
130 pci_name(pdev));
131 goto out_unmap;
132 }
133 return 0; 160 return 0;
134 161
135out_unmap: 162out_unmap:
@@ -138,19 +165,26 @@ out_unmap:
138} 165}
139 166
140static void kvm_iommu_put_pages(struct kvm *kvm, 167static void kvm_iommu_put_pages(struct kvm *kvm,
141 gfn_t base_gfn, unsigned long npages) 168 gfn_t base_gfn, unsigned long npages)
142{ 169{
143 gfn_t gfn = base_gfn; 170 gfn_t gfn = base_gfn;
144 pfn_t pfn; 171 pfn_t pfn;
145 struct dmar_domain *domain = kvm->arch.intel_iommu_domain; 172 struct iommu_domain *domain = kvm->arch.iommu_domain;
146 int i; 173 unsigned long i;
174 u64 phys;
175
176 /* check if iommu exists and in use */
177 if (!domain)
178 return;
147 179
148 for (i = 0; i < npages; i++) { 180 for (i = 0; i < npages; i++) {
149 pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, 181 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
150 gfn_to_gpa(gfn)); 182 pfn = phys >> PAGE_SHIFT;
151 kvm_release_pfn_clean(pfn); 183 kvm_release_pfn_clean(pfn);
152 gfn++; 184 gfn++;
153 } 185 }
186
187 iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages);
154} 188}
155 189
156static int kvm_iommu_unmap_memslots(struct kvm *kvm) 190static int kvm_iommu_unmap_memslots(struct kvm *kvm)
@@ -168,24 +202,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
168 202
169int kvm_iommu_unmap_guest(struct kvm *kvm) 203int kvm_iommu_unmap_guest(struct kvm *kvm)
170{ 204{
171 struct kvm_assigned_dev_kernel *entry; 205 struct iommu_domain *domain = kvm->arch.iommu_domain;
172 struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
173 206
174 /* check if iommu exists and in use */ 207 /* check if iommu exists and in use */
175 if (!domain) 208 if (!domain)
176 return 0; 209 return 0;
177 210
178 list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
179 printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
180 entry->host_busnr,
181 PCI_SLOT(entry->host_devfn),
182 PCI_FUNC(entry->host_devfn));
183
184 /* detach kvm dmar domain */
185 intel_iommu_detach_dev(domain, entry->host_busnr,
186 entry->host_devfn);
187 }
188 kvm_iommu_unmap_memslots(kvm); 211 kvm_iommu_unmap_memslots(kvm);
189 intel_iommu_domain_exit(domain); 212 iommu_domain_free(domain);
190 return 0; 213 return 0;
191} 214}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc6127cbea1f..3a5a08298aab 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -496,6 +496,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
496 match->assigned_dev_id = assigned_dev->assigned_dev_id; 496 match->assigned_dev_id = assigned_dev->assigned_dev_id;
497 match->host_busnr = assigned_dev->busnr; 497 match->host_busnr = assigned_dev->busnr;
498 match->host_devfn = assigned_dev->devfn; 498 match->host_devfn = assigned_dev->devfn;
499 match->flags = assigned_dev->flags;
499 match->dev = dev; 500 match->dev = dev;
500 match->irq_source_id = -1; 501 match->irq_source_id = -1;
501 match->kvm = kvm; 502 match->kvm = kvm;
@@ -503,7 +504,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
503 list_add(&match->list, &kvm->arch.assigned_dev_head); 504 list_add(&match->list, &kvm->arch.assigned_dev_head);
504 505
505 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { 506 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
506 r = kvm_iommu_map_guest(kvm, match); 507 if (!kvm->arch.iommu_domain) {
508 r = kvm_iommu_map_guest(kvm);
509 if (r)
510 goto out_list_del;
511 }
512 r = kvm_assign_device(kvm, match);
507 if (r) 513 if (r)
508 goto out_list_del; 514 goto out_list_del;
509 } 515 }
@@ -525,6 +531,35 @@ out_free:
525} 531}
526#endif 532#endif
527 533
534#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
535static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
536 struct kvm_assigned_pci_dev *assigned_dev)
537{
538 int r = 0;
539 struct kvm_assigned_dev_kernel *match;
540
541 mutex_lock(&kvm->lock);
542
543 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
544 assigned_dev->assigned_dev_id);
545 if (!match) {
546 printk(KERN_INFO "%s: device hasn't been assigned before, "
547 "so cannot be deassigned\n", __func__);
548 r = -EINVAL;
549 goto out;
550 }
551
552 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
553 kvm_deassign_device(kvm, match);
554
555 kvm_free_assigned_device(kvm, match);
556
557out:
558 mutex_unlock(&kvm->lock);
559 return r;
560}
561#endif
562
528static inline int valid_vcpu(int n) 563static inline int valid_vcpu(int n)
529{ 564{
530 return likely(n >= 0 && n < KVM_MAX_VCPUS); 565 return likely(n >= 0 && n < KVM_MAX_VCPUS);
@@ -1858,6 +1893,19 @@ static long kvm_vm_ioctl(struct file *filp,
1858 break; 1893 break;
1859 } 1894 }
1860#endif 1895#endif
1896#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1897 case KVM_DEASSIGN_PCI_DEVICE: {
1898 struct kvm_assigned_pci_dev assigned_dev;
1899
1900 r = -EFAULT;
1901 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1902 goto out;
1903 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1904 if (r)
1905 goto out;
1906 break;
1907 }
1908#endif
1861 default: 1909 default:
1862 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1910 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1863 } 1911 }