aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-03 15:03:52 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-03 15:03:52 -0500
commit269b012321f2f1f8e4648c43a93bf432b42c6668 (patch)
treee30ec565db50d4aec570cb52e9b29bd584beb2fd /arch
parentf60a0a79846abed04ad5abddb5dafd14b66e1ab0 (diff)
parent065a6d68c71af2a3bdd080fa5aa353b76eede8f5 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu: (89 commits) AMD IOMMU: remove now unnecessary #ifdefs AMD IOMMU: prealloc_protection_domains should be static kvm/iommu: fix compile warning AMD IOMMU: add statistics about total number of map requests AMD IOMMU: add statistics about allocated io memory AMD IOMMU: add stats counter for domain tlb flushes AMD IOMMU: add stats counter for single iommu domain tlb flushes AMD IOMMU: add stats counter for cross-page request AMD IOMMU: add stats counter for free_coherent requests AMD IOMMU: add stats counter for alloc_coherent requests AMD IOMMU: add stats counter for unmap_sg requests AMD IOMMU: add stats counter for map_sg requests AMD IOMMU: add stats counter for unmap_single requests AMD IOMMU: add stats counter for map_single requests AMD IOMMU: add stats counter for completion wait events AMD IOMMU: add init code for statistic collection AMD IOMMU: add necessary header defines for stats counting AMD IOMMU: add Kconfig entry for statistic collection code AMD IOMMU: use dev_name in iommu_enable function AMD IOMMU: use calc_devid in prealloc_protection_domains ...
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/kvm_host.h2
-rw-r--r--arch/ia64/kvm/Makefile4
-rw-r--r--arch/ia64/kvm/kvm-ia64.c3
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h61
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kernel/amd_iommu.c664
-rw-r--r--arch/x86/kernel/amd_iommu_init.c15
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/x86.c3
11 files changed, 680 insertions, 94 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 7fa8f615ba6e..3d31636cbafb 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -687,3 +687,6 @@ config IRQ_PER_CPU
687 687
688config IOMMU_HELPER 688config IOMMU_HELPER
689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) 689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
690
691config IOMMU_API
692 def_bool (DMAR)
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 0560f3fae538..348663661659 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -467,7 +467,7 @@ struct kvm_arch {
467 struct kvm_sal_data rdv_sal_data; 467 struct kvm_sal_data rdv_sal_data;
468 468
469 struct list_head assigned_dev_head; 469 struct list_head assigned_dev_head;
470 struct dmar_domain *intel_iommu_domain; 470 struct iommu_domain *iommu_domain;
471 struct hlist_head irq_ack_notifier_list; 471 struct hlist_head irq_ack_notifier_list;
472 472
473 unsigned long irq_sources_bitmap; 473 unsigned long irq_sources_bitmap;
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 76464dc312e6..0bb99b732908 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,8 +51,8 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
51common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ 51common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
52 coalesced_mmio.o irq_comm.o) 52 coalesced_mmio.o irq_comm.o)
53 53
54ifeq ($(CONFIG_DMAR),y) 54ifeq ($(CONFIG_IOMMU_API),y)
55common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 55common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
56endif 56endif
57 57
58kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o 58kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0f5ebd948437..4e586f6110aa 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
31#include <linux/bitops.h> 31#include <linux/bitops.h>
32#include <linux/hrtimer.h> 32#include <linux/hrtimer.h>
33#include <linux/uaccess.h> 33#include <linux/uaccess.h>
34#include <linux/iommu.h>
34#include <linux/intel-iommu.h> 35#include <linux/intel-iommu.h>
35 36
36#include <asm/pgtable.h> 37#include <asm/pgtable.h>
@@ -188,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext)
188 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 189 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
189 break; 190 break;
190 case KVM_CAP_IOMMU: 191 case KVM_CAP_IOMMU:
191 r = intel_iommu_found(); 192 r = iommu_found();
192 break; 193 break;
193 default: 194 default:
194 r = 0; 195 r = 0;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 249d1e0824b5..862adb9bf0d4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,6 +586,16 @@ config AMD_IOMMU
586 your BIOS for an option to enable it or if you have an IVRS ACPI 586 your BIOS for an option to enable it or if you have an IVRS ACPI
587 table. 587 table.
588 588
589config AMD_IOMMU_STATS
590 bool "Export AMD IOMMU statistics to debugfs"
591 depends on AMD_IOMMU
592 select DEBUG_FS
593 help
594 This option enables code in the AMD IOMMU driver to collect various
595 statistics about whats happening in the driver and exports that
596 information to userspace via debugfs.
597 If unsure, say N.
598
589# need this always selected by IOMMU for the VIA workaround 599# need this always selected by IOMMU for the VIA workaround
590config SWIOTLB 600config SWIOTLB
591 def_bool y if X86_64 601 def_bool y if X86_64
@@ -599,6 +609,9 @@ config SWIOTLB
599config IOMMU_HELPER 609config IOMMU_HELPER
600 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 610 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
601 611
612config IOMMU_API
613 def_bool (AMD_IOMMU || DMAR)
614
602config MAXSMP 615config MAXSMP
603 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 616 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
604 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL 617 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ac302a2fa339..95c8cd9d22b5 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -190,16 +190,23 @@
190/* FIXME: move this macro to <linux/pci.h> */ 190/* FIXME: move this macro to <linux/pci.h> */
191#define PCI_BUS(x) (((x) >> 8) & 0xff) 191#define PCI_BUS(x) (((x) >> 8) & 0xff)
192 192
193/* Protection domain flags */
194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
196 domain for an IOMMU */
197
193/* 198/*
194 * This structure contains generic data for IOMMU protection domains 199 * This structure contains generic data for IOMMU protection domains
195 * independent of their use. 200 * independent of their use.
196 */ 201 */
197struct protection_domain { 202struct protection_domain {
198 spinlock_t lock; /* mostly used to lock the page table*/ 203 spinlock_t lock; /* mostly used to lock the page table*/
199 u16 id; /* the domain id written to the device table */ 204 u16 id; /* the domain id written to the device table */
200 int mode; /* paging mode (0-6 levels) */ 205 int mode; /* paging mode (0-6 levels) */
201 u64 *pt_root; /* page table root pointer */ 206 u64 *pt_root; /* page table root pointer */
202 void *priv; /* private data */ 207 unsigned long flags; /* flags to find out type of domain */
208 unsigned dev_cnt; /* devices assigned to this domain */
209 void *priv; /* private data */
203}; 210};
204 211
205/* 212/*
@@ -295,7 +302,7 @@ struct amd_iommu {
295 bool int_enabled; 302 bool int_enabled;
296 303
297 /* if one, we need to send a completion wait command */ 304 /* if one, we need to send a completion wait command */
298 int need_sync; 305 bool need_sync;
299 306
300 /* default dma_ops domain for that IOMMU */ 307 /* default dma_ops domain for that IOMMU */
301 struct dma_ops_domain *default_dom; 308 struct dma_ops_domain *default_dom;
@@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
374extern unsigned long *amd_iommu_pd_alloc_bitmap; 381extern unsigned long *amd_iommu_pd_alloc_bitmap;
375 382
376/* will be 1 if device isolation is enabled */ 383/* will be 1 if device isolation is enabled */
377extern int amd_iommu_isolate; 384extern bool amd_iommu_isolate;
378 385
379/* 386/*
380 * If true, the addresses will be flushed on unmap time, not when 387 * If true, the addresses will be flushed on unmap time, not when
@@ -382,18 +389,6 @@ extern int amd_iommu_isolate;
382 */ 389 */
383extern bool amd_iommu_unmap_flush; 390extern bool amd_iommu_unmap_flush;
384 391
385/* takes a PCI device id and prints it out in a readable form */
386static inline void print_devid(u16 devid, int nl)
387{
388 int bus = devid >> 8;
389 int dev = devid >> 3 & 0x1f;
390 int fn = devid & 0x07;
391
392 printk("%02x:%02x.%x", bus, dev, fn);
393 if (nl)
394 printk("\n");
395}
396
397/* takes bus and device/function and returns the device id 392/* takes bus and device/function and returns the device id
398 * FIXME: should that be in generic PCI code? */ 393 * FIXME: should that be in generic PCI code? */
399static inline u16 calc_devid(u8 bus, u8 devfn) 394static inline u16 calc_devid(u8 bus, u8 devfn)
@@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
401 return (((u16)bus) << 8) | devfn; 396 return (((u16)bus) << 8) | devfn;
402} 397}
403 398
399#ifdef CONFIG_AMD_IOMMU_STATS
400
401struct __iommu_counter {
402 char *name;
403 struct dentry *dent;
404 u64 value;
405};
406
407#define DECLARE_STATS_COUNTER(nm) \
408 static struct __iommu_counter nm = { \
409 .name = #nm, \
410 }
411
412#define INC_STATS_COUNTER(name) name.value += 1
413#define ADD_STATS_COUNTER(name, x) name.value += (x)
414#define SUB_STATS_COUNTER(name, x) name.value -= (x)
415
416#else /* CONFIG_AMD_IOMMU_STATS */
417
418#define DECLARE_STATS_COUNTER(name)
419#define INC_STATS_COUNTER(name)
420#define ADD_STATS_COUNTER(name, x)
421#define SUB_STATS_COUNTER(name, x)
422
423static inline void amd_iommu_stats_init(void) { }
424
425#endif /* CONFIG_AMD_IOMMU_STATS */
426
404#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ 427#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 97215a458e5f..730843d1d2fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -360,7 +360,7 @@ struct kvm_arch{
360 struct list_head active_mmu_pages; 360 struct list_head active_mmu_pages;
361 struct list_head assigned_dev_head; 361 struct list_head assigned_dev_head;
362 struct list_head oos_global_pages; 362 struct list_head oos_global_pages;
363 struct dmar_domain *intel_iommu_domain; 363 struct iommu_domain *iommu_domain;
364 struct kvm_pic *vpic; 364 struct kvm_pic *vpic;
365 struct kvm_ioapic *vioapic; 365 struct kvm_ioapic *vioapic;
366 struct kvm_pit *vpit; 366 struct kvm_pit *vpit;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 658e29e0f49b..5113c080f0c4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,8 +20,12 @@
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/debugfs.h>
23#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
24#include <linux/iommu-helper.h> 25#include <linux/iommu-helper.h>
26#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h>
28#endif
25#include <asm/proto.h> 29#include <asm/proto.h>
26#include <asm/iommu.h> 30#include <asm/iommu.h>
27#include <asm/gart.h> 31#include <asm/gart.h>
@@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
38static LIST_HEAD(iommu_pd_list); 42static LIST_HEAD(iommu_pd_list);
39static DEFINE_SPINLOCK(iommu_pd_list_lock); 43static DEFINE_SPINLOCK(iommu_pd_list_lock);
40 44
45#ifdef CONFIG_IOMMU_API
46static struct iommu_ops amd_iommu_ops;
47#endif
48
41/* 49/*
42 * general struct to manage commands send to an IOMMU 50 * general struct to manage commands send to an IOMMU
43 */ 51 */
@@ -47,6 +55,68 @@ struct iommu_cmd {
47 55
48static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 56static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
49 struct unity_map_entry *e); 57 struct unity_map_entry *e);
58static struct dma_ops_domain *find_protection_domain(u16 devid);
59
60
61#ifdef CONFIG_AMD_IOMMU_STATS
62
63/*
64 * Initialization code for statistics collection
65 */
66
67DECLARE_STATS_COUNTER(compl_wait);
68DECLARE_STATS_COUNTER(cnt_map_single);
69DECLARE_STATS_COUNTER(cnt_unmap_single);
70DECLARE_STATS_COUNTER(cnt_map_sg);
71DECLARE_STATS_COUNTER(cnt_unmap_sg);
72DECLARE_STATS_COUNTER(cnt_alloc_coherent);
73DECLARE_STATS_COUNTER(cnt_free_coherent);
74DECLARE_STATS_COUNTER(cross_page);
75DECLARE_STATS_COUNTER(domain_flush_single);
76DECLARE_STATS_COUNTER(domain_flush_all);
77DECLARE_STATS_COUNTER(alloced_io_mem);
78DECLARE_STATS_COUNTER(total_map_requests);
79
80static struct dentry *stats_dir;
81static struct dentry *de_isolate;
82static struct dentry *de_fflush;
83
84static void amd_iommu_stats_add(struct __iommu_counter *cnt)
85{
86 if (stats_dir == NULL)
87 return;
88
89 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
90 &cnt->value);
91}
92
93static void amd_iommu_stats_init(void)
94{
95 stats_dir = debugfs_create_dir("amd-iommu", NULL);
96 if (stats_dir == NULL)
97 return;
98
99 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
100 (u32 *)&amd_iommu_isolate);
101
102 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
103 (u32 *)&amd_iommu_unmap_flush);
104
105 amd_iommu_stats_add(&compl_wait);
106 amd_iommu_stats_add(&cnt_map_single);
107 amd_iommu_stats_add(&cnt_unmap_single);
108 amd_iommu_stats_add(&cnt_map_sg);
109 amd_iommu_stats_add(&cnt_unmap_sg);
110 amd_iommu_stats_add(&cnt_alloc_coherent);
111 amd_iommu_stats_add(&cnt_free_coherent);
112 amd_iommu_stats_add(&cross_page);
113 amd_iommu_stats_add(&domain_flush_single);
114 amd_iommu_stats_add(&domain_flush_all);
115 amd_iommu_stats_add(&alloced_io_mem);
116 amd_iommu_stats_add(&total_map_requests);
117}
118
119#endif
50 120
51/* returns !0 if the IOMMU is caching non-present entries in its TLB */ 121/* returns !0 if the IOMMU is caching non-present entries in its TLB */
52static int iommu_has_npcache(struct amd_iommu *iommu) 122static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -189,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
189 spin_lock_irqsave(&iommu->lock, flags); 259 spin_lock_irqsave(&iommu->lock, flags);
190 ret = __iommu_queue_command(iommu, cmd); 260 ret = __iommu_queue_command(iommu, cmd);
191 if (!ret) 261 if (!ret)
192 iommu->need_sync = 1; 262 iommu->need_sync = true;
193 spin_unlock_irqrestore(&iommu->lock, flags); 263 spin_unlock_irqrestore(&iommu->lock, flags);
194 264
195 return ret; 265 return ret;
196} 266}
197 267
198/* 268/*
269 * This function waits until an IOMMU has completed a completion
270 * wait command
271 */
272static void __iommu_wait_for_completion(struct amd_iommu *iommu)
273{
274 int ready = 0;
275 unsigned status = 0;
276 unsigned long i = 0;
277
278 INC_STATS_COUNTER(compl_wait);
279
280 while (!ready && (i < EXIT_LOOP_COUNT)) {
281 ++i;
282 /* wait for the bit to become one */
283 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
284 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
285 }
286
287 /* set bit back to zero */
288 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
289 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
290
291 if (unlikely(i == EXIT_LOOP_COUNT))
292 panic("AMD IOMMU: Completion wait loop failed\n");
293}
294
295/*
296 * This function queues a completion wait command into the command
297 * buffer of an IOMMU
298 */
299static int __iommu_completion_wait(struct amd_iommu *iommu)
300{
301 struct iommu_cmd cmd;
302
303 memset(&cmd, 0, sizeof(cmd));
304 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
305 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
306
307 return __iommu_queue_command(iommu, &cmd);
308}
309
310/*
199 * This function is called whenever we need to ensure that the IOMMU has 311 * This function is called whenever we need to ensure that the IOMMU has
200 * completed execution of all commands we sent. It sends a 312 * completed execution of all commands we sent. It sends a
201 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 313 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
@@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
204 */ 316 */
205static int iommu_completion_wait(struct amd_iommu *iommu) 317static int iommu_completion_wait(struct amd_iommu *iommu)
206{ 318{
207 int ret = 0, ready = 0; 319 int ret = 0;
208 unsigned status = 0; 320 unsigned long flags;
209 struct iommu_cmd cmd;
210 unsigned long flags, i = 0;
211
212 memset(&cmd, 0, sizeof(cmd));
213 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
214 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
215 321
216 spin_lock_irqsave(&iommu->lock, flags); 322 spin_lock_irqsave(&iommu->lock, flags);
217 323
218 if (!iommu->need_sync) 324 if (!iommu->need_sync)
219 goto out; 325 goto out;
220 326
221 iommu->need_sync = 0; 327 ret = __iommu_completion_wait(iommu);
222 328
223 ret = __iommu_queue_command(iommu, &cmd); 329 iommu->need_sync = false;
224 330
225 if (ret) 331 if (ret)
226 goto out; 332 goto out;
227 333
228 while (!ready && (i < EXIT_LOOP_COUNT)) { 334 __iommu_wait_for_completion(iommu);
229 ++i;
230 /* wait for the bit to become one */
231 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
232 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
233 }
234
235 /* set bit back to zero */
236 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
237 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
238
239 if (unlikely(i == EXIT_LOOP_COUNT))
240 panic("AMD IOMMU: Completion wait loop failed\n");
241 335
242out: 336out:
243 spin_unlock_irqrestore(&iommu->lock, flags); 337 spin_unlock_irqrestore(&iommu->lock, flags);
@@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
264 return ret; 358 return ret;
265} 359}
266 360
361static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
362 u16 domid, int pde, int s)
363{
364 memset(cmd, 0, sizeof(*cmd));
365 address &= PAGE_MASK;
366 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
367 cmd->data[1] |= domid;
368 cmd->data[2] = lower_32_bits(address);
369 cmd->data[3] = upper_32_bits(address);
370 if (s) /* size bit - we flush more than one 4kb page */
371 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
372 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
373 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
374}
375
267/* 376/*
268 * Generic command send function for invalidaing TLB entries 377 * Generic command send function for invalidaing TLB entries
269 */ 378 */
@@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
273 struct iommu_cmd cmd; 382 struct iommu_cmd cmd;
274 int ret; 383 int ret;
275 384
276 memset(&cmd, 0, sizeof(cmd)); 385 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
277 address &= PAGE_MASK;
278 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
279 cmd.data[1] |= domid;
280 cmd.data[2] = lower_32_bits(address);
281 cmd.data[3] = upper_32_bits(address);
282 if (s) /* size bit - we flush more than one 4kb page */
283 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
284 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
285 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
286 386
287 ret = iommu_queue_command(iommu, &cmd); 387 ret = iommu_queue_command(iommu, &cmd);
288 388
@@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
321{ 421{
322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
323 423
424 INC_STATS_COUNTER(domain_flush_single);
425
324 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 426 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
325} 427}
326 428
429/*
430 * This function is used to flush the IO/TLB for a given protection domain
431 * on every IOMMU in the system
432 */
433static void iommu_flush_domain(u16 domid)
434{
435 unsigned long flags;
436 struct amd_iommu *iommu;
437 struct iommu_cmd cmd;
438
439 INC_STATS_COUNTER(domain_flush_all);
440
441 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
442 domid, 1, 1);
443
444 list_for_each_entry(iommu, &amd_iommu_list, list) {
445 spin_lock_irqsave(&iommu->lock, flags);
446 __iommu_queue_command(iommu, &cmd);
447 __iommu_completion_wait(iommu);
448 __iommu_wait_for_completion(iommu);
449 spin_unlock_irqrestore(&iommu->lock, flags);
450 }
451}
452
327/**************************************************************************** 453/****************************************************************************
328 * 454 *
329 * The functions below are used the create the page table mappings for 455 * The functions below are used the create the page table mappings for
@@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
338 * supporting all features of AMD IOMMU page tables like level skipping 464 * supporting all features of AMD IOMMU page tables like level skipping
339 * and full 64 bit address spaces. 465 * and full 64 bit address spaces.
340 */ 466 */
341static int iommu_map(struct protection_domain *dom, 467static int iommu_map_page(struct protection_domain *dom,
342 unsigned long bus_addr, 468 unsigned long bus_addr,
343 unsigned long phys_addr, 469 unsigned long phys_addr,
344 int prot) 470 int prot)
345{ 471{
346 u64 __pte, *pte, *page; 472 u64 __pte, *pte, *page;
347 473
@@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom,
388 return 0; 514 return 0;
389} 515}
390 516
517static void iommu_unmap_page(struct protection_domain *dom,
518 unsigned long bus_addr)
519{
520 u64 *pte;
521
522 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
523
524 if (!IOMMU_PTE_PRESENT(*pte))
525 return;
526
527 pte = IOMMU_PTE_PAGE(*pte);
528 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
529
530 if (!IOMMU_PTE_PRESENT(*pte))
531 return;
532
533 pte = IOMMU_PTE_PAGE(*pte);
534 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
535
536 *pte = 0;
537}
538
391/* 539/*
392 * This function checks if a specific unity mapping entry is needed for 540 * This function checks if a specific unity mapping entry is needed for
393 * this specific IOMMU. 541 * this specific IOMMU.
@@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
440 588
441 for (addr = e->address_start; addr < e->address_end; 589 for (addr = e->address_start; addr < e->address_end;
442 addr += PAGE_SIZE) { 590 addr += PAGE_SIZE) {
443 ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 591 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
444 if (ret) 592 if (ret)
445 return ret; 593 return ret;
446 /* 594 /*
@@ -571,6 +719,16 @@ static u16 domain_id_alloc(void)
571 return id; 719 return id;
572} 720}
573 721
722static void domain_id_free(int id)
723{
724 unsigned long flags;
725
726 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
727 if (id > 0 && id < MAX_DOMAIN_ID)
728 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
729 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
730}
731
574/* 732/*
575 * Used to reserve address ranges in the aperture (e.g. for exclusion 733 * Used to reserve address ranges in the aperture (e.g. for exclusion
576 * ranges. 734 * ranges.
@@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
587 iommu_area_reserve(dom->bitmap, start_page, pages); 745 iommu_area_reserve(dom->bitmap, start_page, pages);
588} 746}
589 747
590static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 748static void free_pagetable(struct protection_domain *domain)
591{ 749{
592 int i, j; 750 int i, j;
593 u64 *p1, *p2, *p3; 751 u64 *p1, *p2, *p3;
594 752
595 p1 = dma_dom->domain.pt_root; 753 p1 = domain->pt_root;
596 754
597 if (!p1) 755 if (!p1)
598 return; 756 return;
@@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
613 } 771 }
614 772
615 free_page((unsigned long)p1); 773 free_page((unsigned long)p1);
774
775 domain->pt_root = NULL;
616} 776}
617 777
618/* 778/*
@@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
624 if (!dom) 784 if (!dom)
625 return; 785 return;
626 786
627 dma_ops_free_pagetable(dom); 787 free_pagetable(&dom->domain);
628 788
629 kfree(dom->pte_pages); 789 kfree(dom->pte_pages);
630 790
@@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
663 goto free_dma_dom; 823 goto free_dma_dom;
664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 824 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 825 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
826 dma_dom->domain.flags = PD_DMA_OPS_MASK;
666 dma_dom->domain.priv = dma_dom; 827 dma_dom->domain.priv = dma_dom;
667 if (!dma_dom->domain.pt_root) 828 if (!dma_dom->domain.pt_root)
668 goto free_dma_dom; 829 goto free_dma_dom;
@@ -725,6 +886,15 @@ free_dma_dom:
725} 886}
726 887
727/* 888/*
889 * little helper function to check whether a given protection domain is a
890 * dma_ops domain
891 */
892static bool dma_ops_domain(struct protection_domain *domain)
893{
894 return domain->flags & PD_DMA_OPS_MASK;
895}
896
897/*
728 * Find out the protection domain structure for a given PCI device. This 898 * Find out the protection domain structure for a given PCI device. This
729 * will give us the pointer to the page table root for example. 899 * will give us the pointer to the page table root for example.
730 */ 900 */
@@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid)
744 * If a device is not yet associated with a domain, this function does 914 * If a device is not yet associated with a domain, this function does
745 * assigns it visible for the hardware 915 * assigns it visible for the hardware
746 */ 916 */
747static void set_device_domain(struct amd_iommu *iommu, 917static void attach_device(struct amd_iommu *iommu,
748 struct protection_domain *domain, 918 struct protection_domain *domain,
749 u16 devid) 919 u16 devid)
750{ 920{
751 unsigned long flags; 921 unsigned long flags;
752
753 u64 pte_root = virt_to_phys(domain->pt_root); 922 u64 pte_root = virt_to_phys(domain->pt_root);
754 923
924 domain->dev_cnt += 1;
925
755 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 926 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
756 << DEV_ENTRY_MODE_SHIFT; 927 << DEV_ENTRY_MODE_SHIFT;
757 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 928 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu,
767 iommu_queue_inv_dev_entry(iommu, devid); 938 iommu_queue_inv_dev_entry(iommu, devid);
768} 939}
769 940
941/*
942 * Removes a device from a protection domain (unlocked)
943 */
944static void __detach_device(struct protection_domain *domain, u16 devid)
945{
946
947 /* lock domain */
948 spin_lock(&domain->lock);
949
950 /* remove domain from the lookup table */
951 amd_iommu_pd_table[devid] = NULL;
952
953 /* remove entry from the device table seen by the hardware */
954 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
955 amd_iommu_dev_table[devid].data[1] = 0;
956 amd_iommu_dev_table[devid].data[2] = 0;
957
958 /* decrease reference counter */
959 domain->dev_cnt -= 1;
960
961 /* ready */
962 spin_unlock(&domain->lock);
963}
964
965/*
966 * Removes a device from a protection domain (with devtable_lock held)
967 */
968static void detach_device(struct protection_domain *domain, u16 devid)
969{
970 unsigned long flags;
971
972 /* lock device table */
973 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
974 __detach_device(domain, devid);
975 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
976}
977
978static int device_change_notifier(struct notifier_block *nb,
979 unsigned long action, void *data)
980{
981 struct device *dev = data;
982 struct pci_dev *pdev = to_pci_dev(dev);
983 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
984 struct protection_domain *domain;
985 struct dma_ops_domain *dma_domain;
986 struct amd_iommu *iommu;
987 int order = amd_iommu_aperture_order;
988 unsigned long flags;
989
990 if (devid > amd_iommu_last_bdf)
991 goto out;
992
993 devid = amd_iommu_alias_table[devid];
994
995 iommu = amd_iommu_rlookup_table[devid];
996 if (iommu == NULL)
997 goto out;
998
999 domain = domain_for_device(devid);
1000
1001 if (domain && !dma_ops_domain(domain))
1002 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1003 "to a non-dma-ops domain\n", dev_name(dev));
1004
1005 switch (action) {
1006 case BUS_NOTIFY_BOUND_DRIVER:
1007 if (domain)
1008 goto out;
1009 dma_domain = find_protection_domain(devid);
1010 if (!dma_domain)
1011 dma_domain = iommu->default_dom;
1012 attach_device(iommu, &dma_domain->domain, devid);
1013 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1014 "device %s\n", dma_domain->domain.id, dev_name(dev));
1015 break;
1016 case BUS_NOTIFY_UNBIND_DRIVER:
1017 if (!domain)
1018 goto out;
1019 detach_device(domain, devid);
1020 break;
1021 case BUS_NOTIFY_ADD_DEVICE:
1022 /* allocate a protection domain if a device is added */
1023 dma_domain = find_protection_domain(devid);
1024 if (dma_domain)
1025 goto out;
1026 dma_domain = dma_ops_domain_alloc(iommu, order);
1027 if (!dma_domain)
1028 goto out;
1029 dma_domain->target_dev = devid;
1030
1031 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1032 list_add_tail(&dma_domain->list, &iommu_pd_list);
1033 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1034
1035 break;
1036 default:
1037 goto out;
1038 }
1039
1040 iommu_queue_inv_dev_entry(iommu, devid);
1041 iommu_completion_wait(iommu);
1042
1043out:
1044 return 0;
1045}
1046
1047struct notifier_block device_nb = {
1048 .notifier_call = device_change_notifier,
1049};
1050
770/***************************************************************************** 1051/*****************************************************************************
771 * 1052 *
772 * The next functions belong to the dma_ops mapping/unmapping code. 1053 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
802 list_for_each_entry(entry, &iommu_pd_list, list) { 1083 list_for_each_entry(entry, &iommu_pd_list, list) {
803 if (entry->target_dev == devid) { 1084 if (entry->target_dev == devid) {
804 ret = entry; 1085 ret = entry;
805 list_del(&ret->list);
806 break; 1086 break;
807 } 1087 }
808 } 1088 }
@@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev,
853 if (!dma_dom) 1133 if (!dma_dom)
854 dma_dom = (*iommu)->default_dom; 1134 dma_dom = (*iommu)->default_dom;
855 *domain = &dma_dom->domain; 1135 *domain = &dma_dom->domain;
856 set_device_domain(*iommu, *domain, *bdf); 1136 attach_device(*iommu, *domain, *bdf);
857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1137 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
858 "device ", (*domain)->id); 1138 "device %s\n", (*domain)->id, dev_name(dev));
859 print_devid(_bdf, 1);
860 } 1139 }
861 1140
862 if (domain_for_device(_bdf) == NULL) 1141 if (domain_for_device(_bdf) == NULL)
863 set_device_domain(*iommu, *domain, _bdf); 1142 attach_device(*iommu, *domain, _bdf);
864 1143
865 return 1; 1144 return 1;
866} 1145}
@@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev,
946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1225 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
947 paddr &= PAGE_MASK; 1226 paddr &= PAGE_MASK;
948 1227
1228 INC_STATS_COUNTER(total_map_requests);
1229
1230 if (pages > 1)
1231 INC_STATS_COUNTER(cross_page);
1232
949 if (align) 1233 if (align)
950 align_mask = (1UL << get_order(size)) - 1; 1234 align_mask = (1UL << get_order(size)) - 1;
951 1235
@@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev,
962 } 1246 }
963 address += offset; 1247 address += offset;
964 1248
1249 ADD_STATS_COUNTER(alloced_io_mem, size);
1250
965 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1251 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
966 iommu_flush_tlb(iommu, dma_dom->domain.id); 1252 iommu_flush_tlb(iommu, dma_dom->domain.id);
967 dma_dom->need_flush = false; 1253 dma_dom->need_flush = false;
@@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu,
998 start += PAGE_SIZE; 1284 start += PAGE_SIZE;
999 } 1285 }
1000 1286
1287 SUB_STATS_COUNTER(alloced_io_mem, size);
1288
1001 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1289 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1002 1290
1003 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1291 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
@@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1019 dma_addr_t addr; 1307 dma_addr_t addr;
1020 u64 dma_mask; 1308 u64 dma_mask;
1021 1309
1310 INC_STATS_COUNTER(cnt_map_single);
1311
1022 if (!check_device(dev)) 1312 if (!check_device(dev))
1023 return bad_dma_address; 1313 return bad_dma_address;
1024 1314
@@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1030 /* device not handled by any AMD IOMMU */ 1320 /* device not handled by any AMD IOMMU */
1031 return (dma_addr_t)paddr; 1321 return (dma_addr_t)paddr;
1032 1322
1323 if (!dma_ops_domain(domain))
1324 return bad_dma_address;
1325
1033 spin_lock_irqsave(&domain->lock, flags); 1326 spin_lock_irqsave(&domain->lock, flags);
1034 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1327 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1035 dma_mask); 1328 dma_mask);
@@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1055 struct protection_domain *domain; 1348 struct protection_domain *domain;
1056 u16 devid; 1349 u16 devid;
1057 1350
1351 INC_STATS_COUNTER(cnt_unmap_single);
1352
1058 if (!check_device(dev) || 1353 if (!check_device(dev) ||
1059 !get_device_resources(dev, &iommu, &domain, &devid)) 1354 !get_device_resources(dev, &iommu, &domain, &devid))
1060 /* device not handled by any AMD IOMMU */ 1355 /* device not handled by any AMD IOMMU */
1061 return; 1356 return;
1062 1357
1358 if (!dma_ops_domain(domain))
1359 return;
1360
1063 spin_lock_irqsave(&domain->lock, flags); 1361 spin_lock_irqsave(&domain->lock, flags);
1064 1362
1065 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1363 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1104 int mapped_elems = 0; 1402 int mapped_elems = 0;
1105 u64 dma_mask; 1403 u64 dma_mask;
1106 1404
1405 INC_STATS_COUNTER(cnt_map_sg);
1406
1107 if (!check_device(dev)) 1407 if (!check_device(dev))
1108 return 0; 1408 return 0;
1109 1409
@@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1114 if (!iommu || !domain) 1414 if (!iommu || !domain)
1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1415 return map_sg_no_iommu(dev, sglist, nelems, dir);
1116 1416
1417 if (!dma_ops_domain(domain))
1418 return 0;
1419
1117 spin_lock_irqsave(&domain->lock, flags); 1420 spin_lock_irqsave(&domain->lock, flags);
1118 1421
1119 for_each_sg(sglist, s, nelems, i) { 1422 for_each_sg(sglist, s, nelems, i) {
@@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1163 u16 devid; 1466 u16 devid;
1164 int i; 1467 int i;
1165 1468
1469 INC_STATS_COUNTER(cnt_unmap_sg);
1470
1166 if (!check_device(dev) || 1471 if (!check_device(dev) ||
1167 !get_device_resources(dev, &iommu, &domain, &devid)) 1472 !get_device_resources(dev, &iommu, &domain, &devid))
1168 return; 1473 return;
1169 1474
1475 if (!dma_ops_domain(domain))
1476 return;
1477
1170 spin_lock_irqsave(&domain->lock, flags); 1478 spin_lock_irqsave(&domain->lock, flags);
1171 1479
1172 for_each_sg(sglist, s, nelems, i) { 1480 for_each_sg(sglist, s, nelems, i) {
@@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
1194 phys_addr_t paddr; 1502 phys_addr_t paddr;
1195 u64 dma_mask = dev->coherent_dma_mask; 1503 u64 dma_mask = dev->coherent_dma_mask;
1196 1504
1505 INC_STATS_COUNTER(cnt_alloc_coherent);
1506
1197 if (!check_device(dev)) 1507 if (!check_device(dev))
1198 return NULL; 1508 return NULL;
1199 1509
@@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
1212 return virt_addr; 1522 return virt_addr;
1213 } 1523 }
1214 1524
1525 if (!dma_ops_domain(domain))
1526 goto out_free;
1527
1215 if (!dma_mask) 1528 if (!dma_mask)
1216 dma_mask = *dev->dma_mask; 1529 dma_mask = *dev->dma_mask;
1217 1530
@@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1533 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1534 size, DMA_BIDIRECTIONAL, true, dma_mask);
1222 1535
1223 if (*dma_addr == bad_dma_address) { 1536 if (*dma_addr == bad_dma_address)
1224 free_pages((unsigned long)virt_addr, get_order(size)); 1537 goto out_free;
1225 virt_addr = NULL;
1226 goto out;
1227 }
1228 1538
1229 iommu_completion_wait(iommu); 1539 iommu_completion_wait(iommu);
1230 1540
1231out:
1232 spin_unlock_irqrestore(&domain->lock, flags); 1541 spin_unlock_irqrestore(&domain->lock, flags);
1233 1542
1234 return virt_addr; 1543 return virt_addr;
1544
1545out_free:
1546
1547 free_pages((unsigned long)virt_addr, get_order(size));
1548
1549 return NULL;
1235} 1550}
1236 1551
1237/* 1552/*
@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size,
1245 struct protection_domain *domain; 1560 struct protection_domain *domain;
1246 u16 devid; 1561 u16 devid;
1247 1562
1563 INC_STATS_COUNTER(cnt_free_coherent);
1564
1248 if (!check_device(dev)) 1565 if (!check_device(dev))
1249 return; 1566 return;
1250 1567
@@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size,
1253 if (!iommu || !domain) 1570 if (!iommu || !domain)
1254 goto free_mem; 1571 goto free_mem;
1255 1572
1573 if (!dma_ops_domain(domain))
1574 goto free_mem;
1575
1256 spin_lock_irqsave(&domain->lock, flags); 1576 spin_lock_irqsave(&domain->lock, flags);
1257 1577
1258 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1578 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
@@ -1305,7 +1625,7 @@ static void prealloc_protection_domains(void)
1305 u16 devid; 1625 u16 devid;
1306 1626
1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1627 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1308 devid = (dev->bus->number << 8) | dev->devfn; 1628 devid = calc_devid(dev->bus->number, dev->devfn);
1309 if (devid > amd_iommu_last_bdf) 1629 if (devid > amd_iommu_last_bdf)
1310 continue; 1630 continue;
1311 devid = amd_iommu_alias_table[devid]; 1631 devid = amd_iommu_alias_table[devid];
@@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1672 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
1353 if (iommu->default_dom == NULL) 1673 if (iommu->default_dom == NULL)
1354 return -ENOMEM; 1674 return -ENOMEM;
1675 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
1355 ret = iommu_init_unity_mappings(iommu); 1676 ret = iommu_init_unity_mappings(iommu);
1356 if (ret) 1677 if (ret)
1357 goto free_domains; 1678 goto free_domains;
@@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void)
1375 /* Make the driver finally visible to the drivers */ 1696 /* Make the driver finally visible to the drivers */
1376 dma_ops = &amd_iommu_dma_ops; 1697 dma_ops = &amd_iommu_dma_ops;
1377 1698
1699 register_iommu(&amd_iommu_ops);
1700
1701 bus_register_notifier(&pci_bus_type, &device_nb);
1702
1703 amd_iommu_stats_init();
1704
1378 return 0; 1705 return 0;
1379 1706
1380free_domains: 1707free_domains:
@@ -1386,3 +1713,224 @@ free_domains:
1386 1713
1387 return ret; 1714 return ret;
1388} 1715}
1716
1717/*****************************************************************************
1718 *
1719 * The following functions belong to the exported interface of AMD IOMMU
1720 *
1721 * This interface allows access to lower level functions of the IOMMU
1722 * like protection domain handling and assignement of devices to domains
1723 * which is not possible with the dma_ops interface.
1724 *
1725 *****************************************************************************/
1726
1727static void cleanup_domain(struct protection_domain *domain)
1728{
1729 unsigned long flags;
1730 u16 devid;
1731
1732 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1733
1734 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
1735 if (amd_iommu_pd_table[devid] == domain)
1736 __detach_device(domain, devid);
1737
1738 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1739}
1740
1741static int amd_iommu_domain_init(struct iommu_domain *dom)
1742{
1743 struct protection_domain *domain;
1744
1745 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1746 if (!domain)
1747 return -ENOMEM;
1748
1749 spin_lock_init(&domain->lock);
1750 domain->mode = PAGE_MODE_3_LEVEL;
1751 domain->id = domain_id_alloc();
1752 if (!domain->id)
1753 goto out_free;
1754 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1755 if (!domain->pt_root)
1756 goto out_free;
1757
1758 dom->priv = domain;
1759
1760 return 0;
1761
1762out_free:
1763 kfree(domain);
1764
1765 return -ENOMEM;
1766}
1767
1768static void amd_iommu_domain_destroy(struct iommu_domain *dom)
1769{
1770 struct protection_domain *domain = dom->priv;
1771
1772 if (!domain)
1773 return;
1774
1775 if (domain->dev_cnt > 0)
1776 cleanup_domain(domain);
1777
1778 BUG_ON(domain->dev_cnt != 0);
1779
1780 free_pagetable(domain);
1781
1782 domain_id_free(domain->id);
1783
1784 kfree(domain);
1785
1786 dom->priv = NULL;
1787}
1788
1789static void amd_iommu_detach_device(struct iommu_domain *dom,
1790 struct device *dev)
1791{
1792 struct protection_domain *domain = dom->priv;
1793 struct amd_iommu *iommu;
1794 struct pci_dev *pdev;
1795 u16 devid;
1796
1797 if (dev->bus != &pci_bus_type)
1798 return;
1799
1800 pdev = to_pci_dev(dev);
1801
1802 devid = calc_devid(pdev->bus->number, pdev->devfn);
1803
1804 if (devid > 0)
1805 detach_device(domain, devid);
1806
1807 iommu = amd_iommu_rlookup_table[devid];
1808 if (!iommu)
1809 return;
1810
1811 iommu_queue_inv_dev_entry(iommu, devid);
1812 iommu_completion_wait(iommu);
1813}
1814
1815static int amd_iommu_attach_device(struct iommu_domain *dom,
1816 struct device *dev)
1817{
1818 struct protection_domain *domain = dom->priv;
1819 struct protection_domain *old_domain;
1820 struct amd_iommu *iommu;
1821 struct pci_dev *pdev;
1822 u16 devid;
1823
1824 if (dev->bus != &pci_bus_type)
1825 return -EINVAL;
1826
1827 pdev = to_pci_dev(dev);
1828
1829 devid = calc_devid(pdev->bus->number, pdev->devfn);
1830
1831 if (devid >= amd_iommu_last_bdf ||
1832 devid != amd_iommu_alias_table[devid])
1833 return -EINVAL;
1834
1835 iommu = amd_iommu_rlookup_table[devid];
1836 if (!iommu)
1837 return -EINVAL;
1838
1839 old_domain = domain_for_device(devid);
1840 if (old_domain)
1841 return -EBUSY;
1842
1843 attach_device(iommu, domain, devid);
1844
1845 iommu_completion_wait(iommu);
1846
1847 return 0;
1848}
1849
1850static int amd_iommu_map_range(struct iommu_domain *dom,
1851 unsigned long iova, phys_addr_t paddr,
1852 size_t size, int iommu_prot)
1853{
1854 struct protection_domain *domain = dom->priv;
1855 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
1856 int prot = 0;
1857 int ret;
1858
1859 if (iommu_prot & IOMMU_READ)
1860 prot |= IOMMU_PROT_IR;
1861 if (iommu_prot & IOMMU_WRITE)
1862 prot |= IOMMU_PROT_IW;
1863
1864 iova &= PAGE_MASK;
1865 paddr &= PAGE_MASK;
1866
1867 for (i = 0; i < npages; ++i) {
1868 ret = iommu_map_page(domain, iova, paddr, prot);
1869 if (ret)
1870 return ret;
1871
1872 iova += PAGE_SIZE;
1873 paddr += PAGE_SIZE;
1874 }
1875
1876 return 0;
1877}
1878
1879static void amd_iommu_unmap_range(struct iommu_domain *dom,
1880 unsigned long iova, size_t size)
1881{
1882
1883 struct protection_domain *domain = dom->priv;
1884 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
1885
1886 iova &= PAGE_MASK;
1887
1888 for (i = 0; i < npages; ++i) {
1889 iommu_unmap_page(domain, iova);
1890 iova += PAGE_SIZE;
1891 }
1892
1893 iommu_flush_domain(domain->id);
1894}
1895
1896static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
1897 unsigned long iova)
1898{
1899 struct protection_domain *domain = dom->priv;
1900 unsigned long offset = iova & ~PAGE_MASK;
1901 phys_addr_t paddr;
1902 u64 *pte;
1903
1904 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
1905
1906 if (!IOMMU_PTE_PRESENT(*pte))
1907 return 0;
1908
1909 pte = IOMMU_PTE_PAGE(*pte);
1910 pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
1911
1912 if (!IOMMU_PTE_PRESENT(*pte))
1913 return 0;
1914
1915 pte = IOMMU_PTE_PAGE(*pte);
1916 pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
1917
1918 if (!IOMMU_PTE_PRESENT(*pte))
1919 return 0;
1920
1921 paddr = *pte & IOMMU_PAGE_MASK;
1922 paddr |= offset;
1923
1924 return paddr;
1925}
1926
1927static struct iommu_ops amd_iommu_ops = {
1928 .domain_init = amd_iommu_domain_init,
1929 .domain_destroy = amd_iommu_domain_destroy,
1930 .attach_dev = amd_iommu_attach_device,
1931 .detach_dev = amd_iommu_detach_device,
1932 .map = amd_iommu_map_range,
1933 .unmap = amd_iommu_unmap_range,
1934 .iova_to_phys = amd_iommu_iova_to_phys,
1935};
1936
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index fb85e8d466cc..42c33cebf00f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 123 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
125int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 125bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */
126bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
127 128
128LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -245,12 +246,8 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
245/* Function to enable the hardware */ 246/* Function to enable the hardware */
246static void __init iommu_enable(struct amd_iommu *iommu) 247static void __init iommu_enable(struct amd_iommu *iommu)
247{ 248{
248 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
249 "at %02x:%02x.%x cap 0x%hx\n", 250 dev_name(&iommu->dev->dev), iommu->cap_ptr);
250 iommu->dev->bus->number,
251 PCI_SLOT(iommu->dev->devfn),
252 PCI_FUNC(iommu->dev->devfn),
253 iommu->cap_ptr);
254 251
255 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
256} 253}
@@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
1218{ 1215{
1219 for (; *str; ++str) { 1216 for (; *str; ++str) {
1220 if (strncmp(str, "isolate", 7) == 0) 1217 if (strncmp(str, "isolate", 7) == 0)
1221 amd_iommu_isolate = 1; 1218 amd_iommu_isolate = true;
1222 if (strncmp(str, "share", 5) == 0) 1219 if (strncmp(str, "share", 5) == 0)
1223 amd_iommu_isolate = 0; 1220 amd_iommu_isolate = false;
1224 if (strncmp(str, "fullflush", 9) == 0) 1221 if (strncmp(str, "fullflush", 9) == 0)
1225 amd_iommu_unmap_flush = true; 1222 amd_iommu_unmap_flush = true;
1226 } 1223 }
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c02343594b4d..d3ec292f00f2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,8 +7,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
7ifeq ($(CONFIG_KVM_TRACE),y) 7ifeq ($(CONFIG_KVM_TRACE),y)
8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) 8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
9endif 9endif
10ifeq ($(CONFIG_DMAR),y) 10ifeq ($(CONFIG_IOMMU_API),y)
11common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 11common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
12endif 12endif
13 13
14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm 14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e6aa8141dcd..cc17546a2406 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -34,6 +34,7 @@
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/mman.h> 35#include <linux/mman.h>
36#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/iommu.h>
37#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
@@ -989,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
989 r = !tdp_enabled; 990 r = !tdp_enabled;
990 break; 991 break;
991 case KVM_CAP_IOMMU: 992 case KVM_CAP_IOMMU:
992 r = intel_iommu_found(); 993 r = iommu_found();
993 break; 994 break;
994 default: 995 default:
995 r = 0; 996 r = 0;