aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/x86_64/boot-options.txt2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h13
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h28
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/amd_gart_64.c (renamed from arch/x86/kernel/pci-gart_64.c)0
-rw-r--r--arch/x86/kernel/amd_iommu.c526
-rw-r--r--arch/x86/kernel/amd_iommu_init.c48
-rw-r--r--arch/x86/kernel/pci-iommu_table.c18
-rw-r--r--drivers/pci/intel-iommu.c1
-rw-r--r--drivers/pci/iov.c1
-rw-r--r--drivers/pci/pci.h37
-rw-r--r--include/linux/pci-ats.h52
-rw-r--r--lib/dma-debug.c18
14 files changed, 456 insertions, 291 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 092e596a1301..c54b4f503e2a 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -206,7 +206,7 @@ IOMMU (input/output memory management unit)
206 (e.g. because you have < 3 GB memory). 206 (e.g. because you have < 3 GB memory).
207 Kernel boot message: "PCI-DMA: Disabling IOMMU" 207 Kernel boot message: "PCI-DMA: Disabling IOMMU"
208 208
209 2. <arch/x86_64/kernel/pci-gart.c>: AMD GART based hardware IOMMU. 209 2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
210 Kernel boot message: "PCI-DMA: using GART IOMMU" 210 Kernel boot message: "PCI-DMA: using GART IOMMU"
211 211
212 3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used 212 3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b9ea9a80faa5..650bb8c47eca 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -689,6 +689,7 @@ config AMD_IOMMU
689 bool "AMD IOMMU support" 689 bool "AMD IOMMU support"
690 select SWIOTLB 690 select SWIOTLB
691 select PCI_MSI 691 select PCI_MSI
692 select PCI_IOV
692 depends on X86_64 && PCI && ACPI 693 depends on X86_64 && PCI && ACPI
693 ---help--- 694 ---help---
694 With this option you can enable support for AMD IOMMU hardware in 695 With this option you can enable support for AMD IOMMU hardware in
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 916bc8111a01..55d95eb789b3 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -19,13 +19,12 @@
19#ifndef _ASM_X86_AMD_IOMMU_PROTO_H 19#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
20#define _ASM_X86_AMD_IOMMU_PROTO_H 20#define _ASM_X86_AMD_IOMMU_PROTO_H
21 21
22struct amd_iommu; 22#include <asm/amd_iommu_types.h>
23 23
24extern int amd_iommu_init_dma_ops(void); 24extern int amd_iommu_init_dma_ops(void);
25extern int amd_iommu_init_passthrough(void); 25extern int amd_iommu_init_passthrough(void);
26extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
26extern irqreturn_t amd_iommu_int_handler(int irq, void *data); 27extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
27extern void amd_iommu_flush_all_domains(void);
28extern void amd_iommu_flush_all_devices(void);
29extern void amd_iommu_apply_erratum_63(u16 devid); 28extern void amd_iommu_apply_erratum_63(u16 devid);
30extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); 29extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
31extern int amd_iommu_init_devices(void); 30extern int amd_iommu_init_devices(void);
@@ -44,4 +43,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
44 (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); 43 (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
45} 44}
46 45
46static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
47{
48 if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
49 return false;
50
51 return !!(iommu->features & f);
52}
53
47#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ 54#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index e3509fc303bf..4c9982995414 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -68,12 +68,25 @@
68#define MMIO_CONTROL_OFFSET 0x0018 68#define MMIO_CONTROL_OFFSET 0x0018
69#define MMIO_EXCL_BASE_OFFSET 0x0020 69#define MMIO_EXCL_BASE_OFFSET 0x0020
70#define MMIO_EXCL_LIMIT_OFFSET 0x0028 70#define MMIO_EXCL_LIMIT_OFFSET 0x0028
71#define MMIO_EXT_FEATURES 0x0030
71#define MMIO_CMD_HEAD_OFFSET 0x2000 72#define MMIO_CMD_HEAD_OFFSET 0x2000
72#define MMIO_CMD_TAIL_OFFSET 0x2008 73#define MMIO_CMD_TAIL_OFFSET 0x2008
73#define MMIO_EVT_HEAD_OFFSET 0x2010 74#define MMIO_EVT_HEAD_OFFSET 0x2010
74#define MMIO_EVT_TAIL_OFFSET 0x2018 75#define MMIO_EVT_TAIL_OFFSET 0x2018
75#define MMIO_STATUS_OFFSET 0x2020 76#define MMIO_STATUS_OFFSET 0x2020
76 77
78
79/* Extended Feature Bits */
80#define FEATURE_PREFETCH (1ULL<<0)
81#define FEATURE_PPR (1ULL<<1)
82#define FEATURE_X2APIC (1ULL<<2)
83#define FEATURE_NX (1ULL<<3)
84#define FEATURE_GT (1ULL<<4)
85#define FEATURE_IA (1ULL<<6)
86#define FEATURE_GA (1ULL<<7)
87#define FEATURE_HE (1ULL<<8)
88#define FEATURE_PC (1ULL<<9)
89
77/* MMIO status bits */ 90/* MMIO status bits */
78#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 91#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
79 92
@@ -113,7 +126,9 @@
113/* command specific defines */ 126/* command specific defines */
114#define CMD_COMPL_WAIT 0x01 127#define CMD_COMPL_WAIT 0x01
115#define CMD_INV_DEV_ENTRY 0x02 128#define CMD_INV_DEV_ENTRY 0x02
116#define CMD_INV_IOMMU_PAGES 0x03 129#define CMD_INV_IOMMU_PAGES 0x03
130#define CMD_INV_IOTLB_PAGES 0x04
131#define CMD_INV_ALL 0x08
117 132
118#define CMD_COMPL_WAIT_STORE_MASK 0x01 133#define CMD_COMPL_WAIT_STORE_MASK 0x01
119#define CMD_COMPL_WAIT_INT_MASK 0x02 134#define CMD_COMPL_WAIT_INT_MASK 0x02
@@ -215,6 +230,8 @@
215#define IOMMU_PTE_IR (1ULL << 61) 230#define IOMMU_PTE_IR (1ULL << 61)
216#define IOMMU_PTE_IW (1ULL << 62) 231#define IOMMU_PTE_IW (1ULL << 62)
217 232
233#define DTE_FLAG_IOTLB 0x01
234
218#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) 235#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
219#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) 236#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
220#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) 237#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
@@ -227,6 +244,7 @@
227/* IOMMU capabilities */ 244/* IOMMU capabilities */
228#define IOMMU_CAP_IOTLB 24 245#define IOMMU_CAP_IOTLB 24
229#define IOMMU_CAP_NPCACHE 26 246#define IOMMU_CAP_NPCACHE 26
247#define IOMMU_CAP_EFR 27
230 248
231#define MAX_DOMAIN_ID 65536 249#define MAX_DOMAIN_ID 65536
232 250
@@ -249,6 +267,8 @@ extern bool amd_iommu_dump;
249 267
250/* global flag if IOMMUs cache non-present entries */ 268/* global flag if IOMMUs cache non-present entries */
251extern bool amd_iommu_np_cache; 269extern bool amd_iommu_np_cache;
270/* Only true if all IOMMUs support device IOTLBs */
271extern bool amd_iommu_iotlb_sup;
252 272
253/* 273/*
254 * Make iterating over all IOMMUs easier 274 * Make iterating over all IOMMUs easier
@@ -371,6 +391,9 @@ struct amd_iommu {
371 /* flags read from acpi table */ 391 /* flags read from acpi table */
372 u8 acpi_flags; 392 u8 acpi_flags;
373 393
394 /* Extended features */
395 u64 features;
396
374 /* 397 /*
375 * Capability pointer. There could be more than one IOMMU per PCI 398 * Capability pointer. There could be more than one IOMMU per PCI
376 * device function if there are more than one AMD IOMMU capability 399 * device function if there are more than one AMD IOMMU capability
@@ -409,9 +432,6 @@ struct amd_iommu {
409 /* if one, we need to send a completion wait command */ 432 /* if one, we need to send a completion wait command */
410 bool need_sync; 433 bool need_sync;
411 434
412 /* becomes true if a command buffer reset is running */
413 bool reset_in_progress;
414
415 /* default dma_ops domain for that IOMMU */ 435 /* default dma_ops domain for that IOMMU */
416 struct dma_ops_domain *default_dom; 436 struct dma_ops_domain *default_dom;
417 437
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7338ef2218bc..97ebf82e0b7f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -117,7 +117,7 @@ obj-$(CONFIG_OF) += devicetree.o
117ifeq ($(CONFIG_X86_64),y) 117ifeq ($(CONFIG_X86_64),y)
118 obj-$(CONFIG_AUDIT) += audit_64.o 118 obj-$(CONFIG_AUDIT) += audit_64.o
119 119
120 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 120 obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o
121 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 121 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
122 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o 122 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
123 123
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b117efd24f71..b117efd24f71 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 57ca77787220..873e7e1ead7b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/pci-ats.h>
21#include <linux/bitmap.h> 22#include <linux/bitmap.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/debugfs.h> 24#include <linux/debugfs.h>
@@ -25,6 +26,7 @@
25#include <linux/dma-mapping.h> 26#include <linux/dma-mapping.h>
26#include <linux/iommu-helper.h> 27#include <linux/iommu-helper.h>
27#include <linux/iommu.h> 28#include <linux/iommu.h>
29#include <linux/delay.h>
28#include <asm/proto.h> 30#include <asm/proto.h>
29#include <asm/iommu.h> 31#include <asm/iommu.h>
30#include <asm/gart.h> 32#include <asm/gart.h>
@@ -34,7 +36,7 @@
34 36
35#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) 37#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
36 38
37#define EXIT_LOOP_COUNT 10000000 39#define LOOP_TIMEOUT 100000
38 40
39static DEFINE_RWLOCK(amd_iommu_devtable_lock); 41static DEFINE_RWLOCK(amd_iommu_devtable_lock);
40 42
@@ -57,7 +59,6 @@ struct iommu_cmd {
57 u32 data[4]; 59 u32 data[4];
58}; 60};
59 61
60static void reset_iommu_command_buffer(struct amd_iommu *iommu);
61static void update_domain(struct protection_domain *domain); 62static void update_domain(struct protection_domain *domain);
62 63
63/**************************************************************************** 64/****************************************************************************
@@ -322,8 +323,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
322 break; 323 break;
323 case EVENT_TYPE_ILL_CMD: 324 case EVENT_TYPE_ILL_CMD:
324 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); 325 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
325 iommu->reset_in_progress = true;
326 reset_iommu_command_buffer(iommu);
327 dump_command(address); 326 dump_command(address);
328 break; 327 break;
329 case EVENT_TYPE_CMD_HARD_ERR: 328 case EVENT_TYPE_CMD_HARD_ERR:
@@ -367,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
367 spin_unlock_irqrestore(&iommu->lock, flags); 366 spin_unlock_irqrestore(&iommu->lock, flags);
368} 367}
369 368
370irqreturn_t amd_iommu_int_handler(int irq, void *data) 369irqreturn_t amd_iommu_int_thread(int irq, void *data)
371{ 370{
372 struct amd_iommu *iommu; 371 struct amd_iommu *iommu;
373 372
@@ -377,192 +376,300 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
377 return IRQ_HANDLED; 376 return IRQ_HANDLED;
378} 377}
379 378
379irqreturn_t amd_iommu_int_handler(int irq, void *data)
380{
381 return IRQ_WAKE_THREAD;
382}
383
380/**************************************************************************** 384/****************************************************************************
381 * 385 *
382 * IOMMU command queuing functions 386 * IOMMU command queuing functions
383 * 387 *
384 ****************************************************************************/ 388 ****************************************************************************/
385 389
386/* 390static int wait_on_sem(volatile u64 *sem)
387 * Writes the command to the IOMMUs command buffer and informs the 391{
388 * hardware about the new command. Must be called with iommu->lock held. 392 int i = 0;
389 */ 393
390static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) 394 while (*sem == 0 && i < LOOP_TIMEOUT) {
395 udelay(1);
396 i += 1;
397 }
398
399 if (i == LOOP_TIMEOUT) {
400 pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
401 return -EIO;
402 }
403
404 return 0;
405}
406
407static void copy_cmd_to_buffer(struct amd_iommu *iommu,
408 struct iommu_cmd *cmd,
409 u32 tail)
391{ 410{
392 u32 tail, head;
393 u8 *target; 411 u8 *target;
394 412
395 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
396 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
397 target = iommu->cmd_buf + tail; 413 target = iommu->cmd_buf + tail;
398 memcpy_toio(target, cmd, sizeof(*cmd)); 414 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
399 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; 415
400 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 416 /* Copy command to buffer */
401 if (tail == head) 417 memcpy(target, cmd, sizeof(*cmd));
402 return -ENOMEM; 418
419 /* Tell the IOMMU about it */
403 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 420 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
421}
404 422
405 return 0; 423static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
424{
425 WARN_ON(address & 0x7ULL);
426
427 memset(cmd, 0, sizeof(*cmd));
428 cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
429 cmd->data[1] = upper_32_bits(__pa(address));
430 cmd->data[2] = 1;
431 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
432}
433
434static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
435{
436 memset(cmd, 0, sizeof(*cmd));
437 cmd->data[0] = devid;
438 CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
439}
440
441static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
442 size_t size, u16 domid, int pde)
443{
444 u64 pages;
445 int s;
446
447 pages = iommu_num_pages(address, size, PAGE_SIZE);
448 s = 0;
449
450 if (pages > 1) {
451 /*
452 * If we have to flush more than one page, flush all
453 * TLB entries for this domain
454 */
455 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
456 s = 1;
457 }
458
459 address &= PAGE_MASK;
460
461 memset(cmd, 0, sizeof(*cmd));
462 cmd->data[1] |= domid;
463 cmd->data[2] = lower_32_bits(address);
464 cmd->data[3] = upper_32_bits(address);
465 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
466 if (s) /* size bit - we flush more than one 4kb page */
467 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
468 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
469 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
470}
471
472static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
473 u64 address, size_t size)
474{
475 u64 pages;
476 int s;
477
478 pages = iommu_num_pages(address, size, PAGE_SIZE);
479 s = 0;
480
481 if (pages > 1) {
482 /*
483 * If we have to flush more than one page, flush all
484 * TLB entries for this domain
485 */
486 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
487 s = 1;
488 }
489
490 address &= PAGE_MASK;
491
492 memset(cmd, 0, sizeof(*cmd));
493 cmd->data[0] = devid;
494 cmd->data[0] |= (qdep & 0xff) << 24;
495 cmd->data[1] = devid;
496 cmd->data[2] = lower_32_bits(address);
497 cmd->data[3] = upper_32_bits(address);
498 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
499 if (s)
500 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
501}
502
503static void build_inv_all(struct iommu_cmd *cmd)
504{
505 memset(cmd, 0, sizeof(*cmd));
506 CMD_SET_TYPE(cmd, CMD_INV_ALL);
406} 507}
407 508
408/* 509/*
409 * General queuing function for commands. Takes iommu->lock and calls 510 * Writes the command to the IOMMUs command buffer and informs the
410 * __iommu_queue_command(). 511 * hardware about the new command.
411 */ 512 */
412static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) 513static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
413{ 514{
515 u32 left, tail, head, next_tail;
414 unsigned long flags; 516 unsigned long flags;
415 int ret;
416 517
518 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
519
520again:
417 spin_lock_irqsave(&iommu->lock, flags); 521 spin_lock_irqsave(&iommu->lock, flags);
418 ret = __iommu_queue_command(iommu, cmd);
419 if (!ret)
420 iommu->need_sync = true;
421 spin_unlock_irqrestore(&iommu->lock, flags);
422 522
423 return ret; 523 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
424} 524 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
525 next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
526 left = (head - next_tail) % iommu->cmd_buf_size;
425 527
426/* 528 if (left <= 2) {
427 * This function waits until an IOMMU has completed a completion 529 struct iommu_cmd sync_cmd;
428 * wait command 530 volatile u64 sem = 0;
429 */ 531 int ret;
430static void __iommu_wait_for_completion(struct amd_iommu *iommu)
431{
432 int ready = 0;
433 unsigned status = 0;
434 unsigned long i = 0;
435 532
436 INC_STATS_COUNTER(compl_wait); 533 build_completion_wait(&sync_cmd, (u64)&sem);
534 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
437 535
438 while (!ready && (i < EXIT_LOOP_COUNT)) { 536 spin_unlock_irqrestore(&iommu->lock, flags);
439 ++i; 537
440 /* wait for the bit to become one */ 538 if ((ret = wait_on_sem(&sem)) != 0)
441 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 539 return ret;
442 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; 540
541 goto again;
443 } 542 }
444 543
445 /* set bit back to zero */ 544 copy_cmd_to_buffer(iommu, cmd, tail);
446 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; 545
447 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); 546 /* We need to sync now to make sure all commands are processed */
547 iommu->need_sync = true;
548
549 spin_unlock_irqrestore(&iommu->lock, flags);
448 550
449 if (unlikely(i == EXIT_LOOP_COUNT)) 551 return 0;
450 iommu->reset_in_progress = true;
451} 552}
452 553
453/* 554/*
454 * This function queues a completion wait command into the command 555 * This function queues a completion wait command into the command
455 * buffer of an IOMMU 556 * buffer of an IOMMU
456 */ 557 */
457static int __iommu_completion_wait(struct amd_iommu *iommu) 558static int iommu_completion_wait(struct amd_iommu *iommu)
458{ 559{
459 struct iommu_cmd cmd; 560 struct iommu_cmd cmd;
561 volatile u64 sem = 0;
562 int ret;
460 563
461 memset(&cmd, 0, sizeof(cmd)); 564 if (!iommu->need_sync)
462 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 565 return 0;
463 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
464 566
465 return __iommu_queue_command(iommu, &cmd); 567 build_completion_wait(&cmd, (u64)&sem);
568
569 ret = iommu_queue_command(iommu, &cmd);
570 if (ret)
571 return ret;
572
573 return wait_on_sem(&sem);
466} 574}
467 575
468/* 576static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
469 * This function is called whenever we need to ensure that the IOMMU has
470 * completed execution of all commands we sent. It sends a
471 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
472 * us about that by writing a value to a physical address we pass with
473 * the command.
474 */
475static int iommu_completion_wait(struct amd_iommu *iommu)
476{ 577{
477 int ret = 0; 578 struct iommu_cmd cmd;
478 unsigned long flags;
479 579
480 spin_lock_irqsave(&iommu->lock, flags); 580 build_inv_dte(&cmd, devid);
481 581
482 if (!iommu->need_sync) 582 return iommu_queue_command(iommu, &cmd);
483 goto out; 583}
484 584
485 ret = __iommu_completion_wait(iommu); 585static void iommu_flush_dte_all(struct amd_iommu *iommu)
586{
587 u32 devid;
486 588
487 iommu->need_sync = false; 589 for (devid = 0; devid <= 0xffff; ++devid)
590 iommu_flush_dte(iommu, devid);
488 591
489 if (ret) 592 iommu_completion_wait(iommu);
490 goto out; 593}
491
492 __iommu_wait_for_completion(iommu);
493 594
494out: 595/*
495 spin_unlock_irqrestore(&iommu->lock, flags); 596 * This function uses heavy locking and may disable irqs for some time. But
597 * this is no issue because it is only called during resume.
598 */
599static void iommu_flush_tlb_all(struct amd_iommu *iommu)
600{
601 u32 dom_id;
496 602
497 if (iommu->reset_in_progress) 603 for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
498 reset_iommu_command_buffer(iommu); 604 struct iommu_cmd cmd;
605 build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
606 dom_id, 1);
607 iommu_queue_command(iommu, &cmd);
608 }
499 609
500 return 0; 610 iommu_completion_wait(iommu);
501} 611}
502 612
503static void iommu_flush_complete(struct protection_domain *domain) 613static void iommu_flush_all(struct amd_iommu *iommu)
504{ 614{
505 int i; 615 struct iommu_cmd cmd;
506 616
507 for (i = 0; i < amd_iommus_present; ++i) { 617 build_inv_all(&cmd);
508 if (!domain->dev_iommu[i])
509 continue;
510 618
511 /* 619 iommu_queue_command(iommu, &cmd);
512 * Devices of this domain are behind this IOMMU 620 iommu_completion_wait(iommu);
513 * We need to wait for completion of all commands. 621}
514 */ 622
515 iommu_completion_wait(amd_iommus[i]); 623void iommu_flush_all_caches(struct amd_iommu *iommu)
624{
625 if (iommu_feature(iommu, FEATURE_IA)) {
626 iommu_flush_all(iommu);
627 } else {
628 iommu_flush_dte_all(iommu);
629 iommu_flush_tlb_all(iommu);
516 } 630 }
517} 631}
518 632
519/* 633/*
520 * Command send function for invalidating a device table entry 634 * Command send function for flushing on-device TLB
521 */ 635 */
522static int iommu_flush_device(struct device *dev) 636static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
523{ 637{
638 struct pci_dev *pdev = to_pci_dev(dev);
524 struct amd_iommu *iommu; 639 struct amd_iommu *iommu;
525 struct iommu_cmd cmd; 640 struct iommu_cmd cmd;
526 u16 devid; 641 u16 devid;
642 int qdep;
527 643
644 qdep = pci_ats_queue_depth(pdev);
528 devid = get_device_id(dev); 645 devid = get_device_id(dev);
529 iommu = amd_iommu_rlookup_table[devid]; 646 iommu = amd_iommu_rlookup_table[devid];
530 647
531 /* Build command */ 648 build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
532 memset(&cmd, 0, sizeof(cmd));
533 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
534 cmd.data[0] = devid;
535 649
536 return iommu_queue_command(iommu, &cmd); 650 return iommu_queue_command(iommu, &cmd);
537} 651}
538 652
539static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
540 u16 domid, int pde, int s)
541{
542 memset(cmd, 0, sizeof(*cmd));
543 address &= PAGE_MASK;
544 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
545 cmd->data[1] |= domid;
546 cmd->data[2] = lower_32_bits(address);
547 cmd->data[3] = upper_32_bits(address);
548 if (s) /* size bit - we flush more than one 4kb page */
549 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
550 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
551 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
552}
553
554/* 653/*
555 * Generic command send function for invalidaing TLB entries 654 * Command send function for invalidating a device table entry
556 */ 655 */
557static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, 656static int device_flush_dte(struct device *dev)
558 u64 address, u16 domid, int pde, int s)
559{ 657{
560 struct iommu_cmd cmd; 658 struct amd_iommu *iommu;
659 struct pci_dev *pdev;
660 u16 devid;
561 int ret; 661 int ret;
562 662
563 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); 663 pdev = to_pci_dev(dev);
664 devid = get_device_id(dev);
665 iommu = amd_iommu_rlookup_table[devid];
564 666
565 ret = iommu_queue_command(iommu, &cmd); 667 ret = iommu_flush_dte(iommu, devid);
668 if (ret)
669 return ret;
670
671 if (pci_ats_enabled(pdev))
672 ret = device_flush_iotlb(dev, 0, ~0UL);
566 673
567 return ret; 674 return ret;
568} 675}
@@ -572,23 +679,14 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
572 * It invalidates a single PTE if the range to flush is within a single 679 * It invalidates a single PTE if the range to flush is within a single
573 * page. Otherwise it flushes the whole TLB of the IOMMU. 680 * page. Otherwise it flushes the whole TLB of the IOMMU.
574 */ 681 */
575static void __iommu_flush_pages(struct protection_domain *domain, 682static void __domain_flush_pages(struct protection_domain *domain,
576 u64 address, size_t size, int pde) 683 u64 address, size_t size, int pde)
577{ 684{
578 int s = 0, i; 685 struct iommu_dev_data *dev_data;
579 unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); 686 struct iommu_cmd cmd;
580 687 int ret = 0, i;
581 address &= PAGE_MASK;
582
583 if (pages > 1) {
584 /*
585 * If we have to flush more than one page, flush all
586 * TLB entries for this domain
587 */
588 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
589 s = 1;
590 }
591 688
689 build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
592 690
593 for (i = 0; i < amd_iommus_present; ++i) { 691 for (i = 0; i < amd_iommus_present; ++i) {
594 if (!domain->dev_iommu[i]) 692 if (!domain->dev_iommu[i])
@@ -598,101 +696,70 @@ static void __iommu_flush_pages(struct protection_domain *domain,
598 * Devices of this domain are behind this IOMMU 696 * Devices of this domain are behind this IOMMU
599 * We need a TLB flush 697 * We need a TLB flush
600 */ 698 */
601 iommu_queue_inv_iommu_pages(amd_iommus[i], address, 699 ret |= iommu_queue_command(amd_iommus[i], &cmd);
602 domain->id, pde, s); 700 }
701
702 list_for_each_entry(dev_data, &domain->dev_list, list) {
703 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
704
705 if (!pci_ats_enabled(pdev))
706 continue;
707
708 ret |= device_flush_iotlb(dev_data->dev, address, size);
603 } 709 }
604 710
605 return; 711 WARN_ON(ret);
606} 712}
607 713
608static void iommu_flush_pages(struct protection_domain *domain, 714static void domain_flush_pages(struct protection_domain *domain,
609 u64 address, size_t size) 715 u64 address, size_t size)
610{ 716{
611 __iommu_flush_pages(domain, address, size, 0); 717 __domain_flush_pages(domain, address, size, 0);
612} 718}
613 719
614/* Flush the whole IO/TLB for a given protection domain */ 720/* Flush the whole IO/TLB for a given protection domain */
615static void iommu_flush_tlb(struct protection_domain *domain) 721static void domain_flush_tlb(struct protection_domain *domain)
616{ 722{
617 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); 723 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
618} 724}
619 725
620/* Flush the whole IO/TLB for a given protection domain - including PDE */ 726/* Flush the whole IO/TLB for a given protection domain - including PDE */
621static void iommu_flush_tlb_pde(struct protection_domain *domain) 727static void domain_flush_tlb_pde(struct protection_domain *domain)
622{ 728{
623 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); 729 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
624}
625
626
627/*
628 * This function flushes the DTEs for all devices in domain
629 */
630static void iommu_flush_domain_devices(struct protection_domain *domain)
631{
632 struct iommu_dev_data *dev_data;
633 unsigned long flags;
634
635 spin_lock_irqsave(&domain->lock, flags);
636
637 list_for_each_entry(dev_data, &domain->dev_list, list)
638 iommu_flush_device(dev_data->dev);
639
640 spin_unlock_irqrestore(&domain->lock, flags);
641} 730}
642 731
643static void iommu_flush_all_domain_devices(void) 732static void domain_flush_complete(struct protection_domain *domain)
644{ 733{
645 struct protection_domain *domain; 734 int i;
646 unsigned long flags;
647 735
648 spin_lock_irqsave(&amd_iommu_pd_lock, flags); 736 for (i = 0; i < amd_iommus_present; ++i) {
737 if (!domain->dev_iommu[i])
738 continue;
649 739
650 list_for_each_entry(domain, &amd_iommu_pd_list, list) { 740 /*
651 iommu_flush_domain_devices(domain); 741 * Devices of this domain are behind this IOMMU
652 iommu_flush_complete(domain); 742 * We need to wait for completion of all commands.
743 */
744 iommu_completion_wait(amd_iommus[i]);
653 } 745 }
654
655 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
656} 746}
657 747
658void amd_iommu_flush_all_devices(void)
659{
660 iommu_flush_all_domain_devices();
661}
662 748
663/* 749/*
664 * This function uses heavy locking and may disable irqs for some time. But 750 * This function flushes the DTEs for all devices in domain
665 * this is no issue because it is only called during resume.
666 */ 751 */
667void amd_iommu_flush_all_domains(void) 752static void domain_flush_devices(struct protection_domain *domain)
668{ 753{
669 struct protection_domain *domain; 754 struct iommu_dev_data *dev_data;
670 unsigned long flags; 755 unsigned long flags;
671 756
672 spin_lock_irqsave(&amd_iommu_pd_lock, flags); 757 spin_lock_irqsave(&domain->lock, flags);
673
674 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
675 spin_lock(&domain->lock);
676 iommu_flush_tlb_pde(domain);
677 iommu_flush_complete(domain);
678 spin_unlock(&domain->lock);
679 }
680
681 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
682}
683
684static void reset_iommu_command_buffer(struct amd_iommu *iommu)
685{
686 pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
687
688 if (iommu->reset_in_progress)
689 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
690 758
691 amd_iommu_reset_cmd_buffer(iommu); 759 list_for_each_entry(dev_data, &domain->dev_list, list)
692 amd_iommu_flush_all_devices(); 760 device_flush_dte(dev_data->dev);
693 amd_iommu_flush_all_domains();
694 761
695 iommu->reset_in_progress = false; 762 spin_unlock_irqrestore(&domain->lock, flags);
696} 763}
697 764
698/**************************************************************************** 765/****************************************************************************
@@ -1410,17 +1477,22 @@ static bool dma_ops_domain(struct protection_domain *domain)
1410 return domain->flags & PD_DMA_OPS_MASK; 1477 return domain->flags & PD_DMA_OPS_MASK;
1411} 1478}
1412 1479
1413static void set_dte_entry(u16 devid, struct protection_domain *domain) 1480static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1414{ 1481{
1415 u64 pte_root = virt_to_phys(domain->pt_root); 1482 u64 pte_root = virt_to_phys(domain->pt_root);
1483 u32 flags = 0;
1416 1484
1417 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 1485 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1418 << DEV_ENTRY_MODE_SHIFT; 1486 << DEV_ENTRY_MODE_SHIFT;
1419 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 1487 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1420 1488
1421 amd_iommu_dev_table[devid].data[2] = domain->id; 1489 if (ats)
1422 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); 1490 flags |= DTE_FLAG_IOTLB;
1423 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); 1491
1492 amd_iommu_dev_table[devid].data[3] |= flags;
1493 amd_iommu_dev_table[devid].data[2] = domain->id;
1494 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1495 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1424} 1496}
1425 1497
1426static void clear_dte_entry(u16 devid) 1498static void clear_dte_entry(u16 devid)
@@ -1437,34 +1509,42 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
1437{ 1509{
1438 struct iommu_dev_data *dev_data; 1510 struct iommu_dev_data *dev_data;
1439 struct amd_iommu *iommu; 1511 struct amd_iommu *iommu;
1512 struct pci_dev *pdev;
1513 bool ats = false;
1440 u16 devid; 1514 u16 devid;
1441 1515
1442 devid = get_device_id(dev); 1516 devid = get_device_id(dev);
1443 iommu = amd_iommu_rlookup_table[devid]; 1517 iommu = amd_iommu_rlookup_table[devid];
1444 dev_data = get_dev_data(dev); 1518 dev_data = get_dev_data(dev);
1519 pdev = to_pci_dev(dev);
1520
1521 if (amd_iommu_iotlb_sup)
1522 ats = pci_ats_enabled(pdev);
1445 1523
1446 /* Update data structures */ 1524 /* Update data structures */
1447 dev_data->domain = domain; 1525 dev_data->domain = domain;
1448 list_add(&dev_data->list, &domain->dev_list); 1526 list_add(&dev_data->list, &domain->dev_list);
1449 set_dte_entry(devid, domain); 1527 set_dte_entry(devid, domain, ats);
1450 1528
1451 /* Do reference counting */ 1529 /* Do reference counting */
1452 domain->dev_iommu[iommu->index] += 1; 1530 domain->dev_iommu[iommu->index] += 1;
1453 domain->dev_cnt += 1; 1531 domain->dev_cnt += 1;
1454 1532
1455 /* Flush the DTE entry */ 1533 /* Flush the DTE entry */
1456 iommu_flush_device(dev); 1534 device_flush_dte(dev);
1457} 1535}
1458 1536
1459static void do_detach(struct device *dev) 1537static void do_detach(struct device *dev)
1460{ 1538{
1461 struct iommu_dev_data *dev_data; 1539 struct iommu_dev_data *dev_data;
1462 struct amd_iommu *iommu; 1540 struct amd_iommu *iommu;
1541 struct pci_dev *pdev;
1463 u16 devid; 1542 u16 devid;
1464 1543
1465 devid = get_device_id(dev); 1544 devid = get_device_id(dev);
1466 iommu = amd_iommu_rlookup_table[devid]; 1545 iommu = amd_iommu_rlookup_table[devid];
1467 dev_data = get_dev_data(dev); 1546 dev_data = get_dev_data(dev);
1547 pdev = to_pci_dev(dev);
1468 1548
1469 /* decrease reference counters */ 1549 /* decrease reference counters */
1470 dev_data->domain->dev_iommu[iommu->index] -= 1; 1550 dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -1476,7 +1556,7 @@ static void do_detach(struct device *dev)
1476 clear_dte_entry(devid); 1556 clear_dte_entry(devid);
1477 1557
1478 /* Flush the DTE entry */ 1558 /* Flush the DTE entry */
1479 iommu_flush_device(dev); 1559 device_flush_dte(dev);
1480} 1560}
1481 1561
1482/* 1562/*
@@ -1539,9 +1619,13 @@ out_unlock:
1539static int attach_device(struct device *dev, 1619static int attach_device(struct device *dev,
1540 struct protection_domain *domain) 1620 struct protection_domain *domain)
1541{ 1621{
1622 struct pci_dev *pdev = to_pci_dev(dev);
1542 unsigned long flags; 1623 unsigned long flags;
1543 int ret; 1624 int ret;
1544 1625
1626 if (amd_iommu_iotlb_sup)
1627 pci_enable_ats(pdev, PAGE_SHIFT);
1628
1545 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1629 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1546 ret = __attach_device(dev, domain); 1630 ret = __attach_device(dev, domain);
1547 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1631 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -1551,7 +1635,7 @@ static int attach_device(struct device *dev,
1551 * left the caches in the IOMMU dirty. So we have to flush 1635 * left the caches in the IOMMU dirty. So we have to flush
1552 * here to evict all dirty stuff. 1636 * here to evict all dirty stuff.
1553 */ 1637 */
1554 iommu_flush_tlb_pde(domain); 1638 domain_flush_tlb_pde(domain);
1555 1639
1556 return ret; 1640 return ret;
1557} 1641}
@@ -1598,12 +1682,16 @@ static void __detach_device(struct device *dev)
1598 */ 1682 */
1599static void detach_device(struct device *dev) 1683static void detach_device(struct device *dev)
1600{ 1684{
1685 struct pci_dev *pdev = to_pci_dev(dev);
1601 unsigned long flags; 1686 unsigned long flags;
1602 1687
1603 /* lock device table */ 1688 /* lock device table */
1604 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1689 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1605 __detach_device(dev); 1690 __detach_device(dev);
1606 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1691 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1692
1693 if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
1694 pci_disable_ats(pdev);
1607} 1695}
1608 1696
1609/* 1697/*
@@ -1692,7 +1780,7 @@ static int device_change_notifier(struct notifier_block *nb,
1692 goto out; 1780 goto out;
1693 } 1781 }
1694 1782
1695 iommu_flush_device(dev); 1783 device_flush_dte(dev);
1696 iommu_completion_wait(iommu); 1784 iommu_completion_wait(iommu);
1697 1785
1698out: 1786out:
@@ -1753,8 +1841,9 @@ static void update_device_table(struct protection_domain *domain)
1753 struct iommu_dev_data *dev_data; 1841 struct iommu_dev_data *dev_data;
1754 1842
1755 list_for_each_entry(dev_data, &domain->dev_list, list) { 1843 list_for_each_entry(dev_data, &domain->dev_list, list) {
1844 struct pci_dev *pdev = to_pci_dev(dev_data->dev);
1756 u16 devid = get_device_id(dev_data->dev); 1845 u16 devid = get_device_id(dev_data->dev);
1757 set_dte_entry(devid, domain); 1846 set_dte_entry(devid, domain, pci_ats_enabled(pdev));
1758 } 1847 }
1759} 1848}
1760 1849
@@ -1764,8 +1853,9 @@ static void update_domain(struct protection_domain *domain)
1764 return; 1853 return;
1765 1854
1766 update_device_table(domain); 1855 update_device_table(domain);
1767 iommu_flush_domain_devices(domain); 1856
1768 iommu_flush_tlb_pde(domain); 1857 domain_flush_devices(domain);
1858 domain_flush_tlb_pde(domain);
1769 1859
1770 domain->updated = false; 1860 domain->updated = false;
1771} 1861}
@@ -1924,10 +2014,10 @@ retry:
1924 ADD_STATS_COUNTER(alloced_io_mem, size); 2014 ADD_STATS_COUNTER(alloced_io_mem, size);
1925 2015
1926 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 2016 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
1927 iommu_flush_tlb(&dma_dom->domain); 2017 domain_flush_tlb(&dma_dom->domain);
1928 dma_dom->need_flush = false; 2018 dma_dom->need_flush = false;
1929 } else if (unlikely(amd_iommu_np_cache)) 2019 } else if (unlikely(amd_iommu_np_cache))
1930 iommu_flush_pages(&dma_dom->domain, address, size); 2020 domain_flush_pages(&dma_dom->domain, address, size);
1931 2021
1932out: 2022out:
1933 return address; 2023 return address;
@@ -1976,7 +2066,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
1976 dma_ops_free_addresses(dma_dom, dma_addr, pages); 2066 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1977 2067
1978 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 2068 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
1979 iommu_flush_pages(&dma_dom->domain, flush_addr, size); 2069 domain_flush_pages(&dma_dom->domain, flush_addr, size);
1980 dma_dom->need_flush = false; 2070 dma_dom->need_flush = false;
1981 } 2071 }
1982} 2072}
@@ -2012,7 +2102,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
2012 if (addr == DMA_ERROR_CODE) 2102 if (addr == DMA_ERROR_CODE)
2013 goto out; 2103 goto out;
2014 2104
2015 iommu_flush_complete(domain); 2105 domain_flush_complete(domain);
2016 2106
2017out: 2107out:
2018 spin_unlock_irqrestore(&domain->lock, flags); 2108 spin_unlock_irqrestore(&domain->lock, flags);
@@ -2039,7 +2129,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
2039 2129
2040 __unmap_single(domain->priv, dma_addr, size, dir); 2130 __unmap_single(domain->priv, dma_addr, size, dir);
2041 2131
2042 iommu_flush_complete(domain); 2132 domain_flush_complete(domain);
2043 2133
2044 spin_unlock_irqrestore(&domain->lock, flags); 2134 spin_unlock_irqrestore(&domain->lock, flags);
2045} 2135}
@@ -2104,7 +2194,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
2104 goto unmap; 2194 goto unmap;
2105 } 2195 }
2106 2196
2107 iommu_flush_complete(domain); 2197 domain_flush_complete(domain);
2108 2198
2109out: 2199out:
2110 spin_unlock_irqrestore(&domain->lock, flags); 2200 spin_unlock_irqrestore(&domain->lock, flags);
@@ -2150,7 +2240,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
2150 s->dma_address = s->dma_length = 0; 2240 s->dma_address = s->dma_length = 0;
2151 } 2241 }
2152 2242
2153 iommu_flush_complete(domain); 2243 domain_flush_complete(domain);
2154 2244
2155 spin_unlock_irqrestore(&domain->lock, flags); 2245 spin_unlock_irqrestore(&domain->lock, flags);
2156} 2246}
@@ -2200,7 +2290,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
2200 goto out_free; 2290 goto out_free;
2201 } 2291 }
2202 2292
2203 iommu_flush_complete(domain); 2293 domain_flush_complete(domain);
2204 2294
2205 spin_unlock_irqrestore(&domain->lock, flags); 2295 spin_unlock_irqrestore(&domain->lock, flags);
2206 2296
@@ -2232,7 +2322,7 @@ static void free_coherent(struct device *dev, size_t size,
2232 2322
2233 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 2323 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2234 2324
2235 iommu_flush_complete(domain); 2325 domain_flush_complete(domain);
2236 2326
2237 spin_unlock_irqrestore(&domain->lock, flags); 2327 spin_unlock_irqrestore(&domain->lock, flags);
2238 2328
@@ -2476,7 +2566,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
2476 if (!iommu) 2566 if (!iommu)
2477 return; 2567 return;
2478 2568
2479 iommu_flush_device(dev); 2569 device_flush_dte(dev);
2480 iommu_completion_wait(iommu); 2570 iommu_completion_wait(iommu);
2481} 2571}
2482 2572
@@ -2542,7 +2632,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2542 unmap_size = iommu_unmap_page(domain, iova, page_size); 2632 unmap_size = iommu_unmap_page(domain, iova, page_size);
2543 mutex_unlock(&domain->api_lock); 2633 mutex_unlock(&domain->api_lock);
2544 2634
2545 iommu_flush_tlb_pde(domain); 2635 domain_flush_tlb_pde(domain);
2546 2636
2547 return get_order(unmap_size); 2637 return get_order(unmap_size);
2548} 2638}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 246d727b65b7..9179c21120a8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -137,6 +137,7 @@ int amd_iommus_present;
137 137
138/* IOMMUs have a non-present cache? */ 138/* IOMMUs have a non-present cache? */
139bool amd_iommu_np_cache __read_mostly; 139bool amd_iommu_np_cache __read_mostly;
140bool amd_iommu_iotlb_sup __read_mostly = true;
140 141
141/* 142/*
142 * The ACPI table parsing functions set this variable on an error 143 * The ACPI table parsing functions set this variable on an error
@@ -180,6 +181,12 @@ static u32 dev_table_size; /* size of the device table */
180static u32 alias_table_size; /* size of the alias table */ 181static u32 alias_table_size; /* size of the alias table */
181static u32 rlookup_table_size; /* size if the rlookup table */ 182static u32 rlookup_table_size; /* size if the rlookup table */
182 183
184/*
185 * This function flushes all internal caches of
186 * the IOMMU used by this driver.
187 */
188extern void iommu_flush_all_caches(struct amd_iommu *iommu);
189
183static inline void update_last_devid(u16 devid) 190static inline void update_last_devid(u16 devid)
184{ 191{
185 if (devid > amd_iommu_last_bdf) 192 if (devid > amd_iommu_last_bdf)
@@ -293,9 +300,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
293/* Function to enable the hardware */ 300/* Function to enable the hardware */
294static void iommu_enable(struct amd_iommu *iommu) 301static void iommu_enable(struct amd_iommu *iommu)
295{ 302{
296 printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", 303 static const char * const feat_str[] = {
304 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
305 "IA", "GA", "HE", "PC", NULL
306 };
307 int i;
308
309 printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
297 dev_name(&iommu->dev->dev), iommu->cap_ptr); 310 dev_name(&iommu->dev->dev), iommu->cap_ptr);
298 311
312 if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
313 printk(KERN_CONT " extended features: ");
314 for (i = 0; feat_str[i]; ++i)
315 if (iommu_feature(iommu, (1ULL << i)))
316 printk(KERN_CONT " %s", feat_str[i]);
317 }
318 printk(KERN_CONT "\n");
319
299 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 320 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
300} 321}
301 322
@@ -651,7 +672,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
651static void __init init_iommu_from_pci(struct amd_iommu *iommu) 672static void __init init_iommu_from_pci(struct amd_iommu *iommu)
652{ 673{
653 int cap_ptr = iommu->cap_ptr; 674 int cap_ptr = iommu->cap_ptr;
654 u32 range, misc; 675 u32 range, misc, low, high;
655 int i, j; 676 int i, j;
656 677
657 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 678 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
@@ -667,6 +688,15 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
667 MMIO_GET_LD(range)); 688 MMIO_GET_LD(range));
668 iommu->evt_msi_num = MMIO_MSI_NUM(misc); 689 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
669 690
691 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
692 amd_iommu_iotlb_sup = false;
693
694 /* read extended feature bits */
695 low = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
696 high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
697
698 iommu->features = ((u64)high << 32) | low;
699
670 if (!is_rd890_iommu(iommu->dev)) 700 if (!is_rd890_iommu(iommu->dev))
671 return; 701 return;
672 702
@@ -1004,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
1004 if (pci_enable_msi(iommu->dev)) 1034 if (pci_enable_msi(iommu->dev))
1005 return 1; 1035 return 1;
1006 1036
1007 r = request_irq(iommu->dev->irq, amd_iommu_int_handler, 1037 r = request_threaded_irq(iommu->dev->irq,
1008 IRQF_SAMPLE_RANDOM, 1038 amd_iommu_int_handler,
1009 "AMD-Vi", 1039 amd_iommu_int_thread,
1010 NULL); 1040 0, "AMD-Vi",
1041 iommu->dev);
1011 1042
1012 if (r) { 1043 if (r) {
1013 pci_disable_msi(iommu->dev); 1044 pci_disable_msi(iommu->dev);
@@ -1244,6 +1275,7 @@ static void enable_iommus(void)
1244 iommu_set_exclusion_range(iommu); 1275 iommu_set_exclusion_range(iommu);
1245 iommu_init_msi(iommu); 1276 iommu_init_msi(iommu);
1246 iommu_enable(iommu); 1277 iommu_enable(iommu);
1278 iommu_flush_all_caches(iommu);
1247 } 1279 }
1248} 1280}
1249 1281
@@ -1274,8 +1306,8 @@ static void amd_iommu_resume(void)
1274 * we have to flush after the IOMMUs are enabled because a 1306 * we have to flush after the IOMMUs are enabled because a
1275 * disabled IOMMU will never execute the commands we send 1307 * disabled IOMMU will never execute the commands we send
1276 */ 1308 */
1277 amd_iommu_flush_all_devices(); 1309 for_each_iommu(iommu)
1278 amd_iommu_flush_all_domains(); 1310 iommu_flush_all_caches(iommu);
1279} 1311}
1280 1312
1281static int amd_iommu_suspend(void) 1313static int amd_iommu_suspend(void)
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 55d745ec1181..35ccf75696eb 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
50 struct iommu_table_entry *finish) 50 struct iommu_table_entry *finish)
51{ 51{
52 struct iommu_table_entry *p, *q, *x; 52 struct iommu_table_entry *p, *q, *x;
53 char sym_p[KSYM_SYMBOL_LEN];
54 char sym_q[KSYM_SYMBOL_LEN];
55 53
56 /* Simple cyclic dependency checker. */ 54 /* Simple cyclic dependency checker. */
57 for (p = start; p < finish; p++) { 55 for (p = start; p < finish; p++) {
58 q = find_dependents_of(start, finish, p); 56 q = find_dependents_of(start, finish, p);
59 x = find_dependents_of(start, finish, q); 57 x = find_dependents_of(start, finish, q);
60 if (p == x) { 58 if (p == x) {
61 sprint_symbol(sym_p, (unsigned long)p->detect); 59 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
62 sprint_symbol(sym_q, (unsigned long)q->detect); 60 p->detect, q->detect);
63
64 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
65 " on %s and vice-versa. BREAKING IT.\n",
66 sym_p, sym_q);
67 /* Heavy handed way..*/ 61 /* Heavy handed way..*/
68 x->depend = 0; 62 x->depend = 0;
69 } 63 }
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
72 for (p = start; p < finish; p++) { 66 for (p = start; p < finish; p++) {
73 q = find_dependents_of(p, finish, p); 67 q = find_dependents_of(p, finish, p);
74 if (q && q > p) { 68 if (q && q > p) {
75 sprint_symbol(sym_p, (unsigned long)p->detect); 69 printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n",
76 sprint_symbol(sym_q, (unsigned long)q->detect); 70 p->detect, q->detect);
77
78 printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
79 "should be called before %s!\n",
80 sym_p, sym_q);
81 } 71 }
82 } 72 }
83} 73}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index d552d2c77844..6af6b628175b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
39#include <linux/syscore_ops.h> 39#include <linux/syscore_ops.h>
40#include <linux/tboot.h> 40#include <linux/tboot.h>
41#include <linux/dmi.h> 41#include <linux/dmi.h>
42#include <linux/pci-ats.h>
42#include <asm/cacheflush.h> 43#include <asm/cacheflush.h>
43#include <asm/iommu.h> 44#include <asm/iommu.h>
44#include "pci.h" 45#include "pci.h"
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 553d8ee55c1c..42fae4776515 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -13,6 +13,7 @@
13#include <linux/mutex.h> 13#include <linux/mutex.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/pci-ats.h>
16#include "pci.h" 17#include "pci.h"
17 18
18#define VIRTFN_ID_LEN 16 19#define VIRTFN_ID_LEN 16
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index a6ec200fe5ee..4020025f854e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -250,15 +250,6 @@ struct pci_sriov {
250 u8 __iomem *mstate; /* VF Migration State Array */ 250 u8 __iomem *mstate; /* VF Migration State Array */
251}; 251};
252 252
253/* Address Translation Service */
254struct pci_ats {
255 int pos; /* capability position */
256 int stu; /* Smallest Translation Unit */
257 int qdep; /* Invalidate Queue Depth */
258 int ref_cnt; /* Physical Function reference count */
259 unsigned int is_enabled:1; /* Enable bit is set */
260};
261
262#ifdef CONFIG_PCI_IOV 253#ifdef CONFIG_PCI_IOV
263extern int pci_iov_init(struct pci_dev *dev); 254extern int pci_iov_init(struct pci_dev *dev);
264extern void pci_iov_release(struct pci_dev *dev); 255extern void pci_iov_release(struct pci_dev *dev);
@@ -269,19 +260,6 @@ extern resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev,
269extern void pci_restore_iov_state(struct pci_dev *dev); 260extern void pci_restore_iov_state(struct pci_dev *dev);
270extern int pci_iov_bus_range(struct pci_bus *bus); 261extern int pci_iov_bus_range(struct pci_bus *bus);
271 262
272extern int pci_enable_ats(struct pci_dev *dev, int ps);
273extern void pci_disable_ats(struct pci_dev *dev);
274extern int pci_ats_queue_depth(struct pci_dev *dev);
275/**
276 * pci_ats_enabled - query the ATS status
277 * @dev: the PCI device
278 *
279 * Returns 1 if ATS capability is enabled, or 0 if not.
280 */
281static inline int pci_ats_enabled(struct pci_dev *dev)
282{
283 return dev->ats && dev->ats->is_enabled;
284}
285#else 263#else
286static inline int pci_iov_init(struct pci_dev *dev) 264static inline int pci_iov_init(struct pci_dev *dev)
287{ 265{
@@ -304,21 +282,6 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
304 return 0; 282 return 0;
305} 283}
306 284
307static inline int pci_enable_ats(struct pci_dev *dev, int ps)
308{
309 return -ENODEV;
310}
311static inline void pci_disable_ats(struct pci_dev *dev)
312{
313}
314static inline int pci_ats_queue_depth(struct pci_dev *dev)
315{
316 return -ENODEV;
317}
318static inline int pci_ats_enabled(struct pci_dev *dev)
319{
320 return 0;
321}
322#endif /* CONFIG_PCI_IOV */ 285#endif /* CONFIG_PCI_IOV */
323 286
324static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, 287static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
new file mode 100644
index 000000000000..655824fa4c76
--- /dev/null
+++ b/include/linux/pci-ats.h
@@ -0,0 +1,52 @@
1#ifndef LINUX_PCI_ATS_H
2#define LINUX_PCI_ATS_H
3
4/* Address Translation Service */
5struct pci_ats {
6 int pos; /* capability position */
7 int stu; /* Smallest Translation Unit */
8 int qdep; /* Invalidate Queue Depth */
9 int ref_cnt; /* Physical Function reference count */
10 unsigned int is_enabled:1; /* Enable bit is set */
11};
12
13#ifdef CONFIG_PCI_IOV
14
15extern int pci_enable_ats(struct pci_dev *dev, int ps);
16extern void pci_disable_ats(struct pci_dev *dev);
17extern int pci_ats_queue_depth(struct pci_dev *dev);
18/**
19 * pci_ats_enabled - query the ATS status
20 * @dev: the PCI device
21 *
22 * Returns 1 if ATS capability is enabled, or 0 if not.
23 */
24static inline int pci_ats_enabled(struct pci_dev *dev)
25{
26 return dev->ats && dev->ats->is_enabled;
27}
28
29#else /* CONFIG_PCI_IOV */
30
31static inline int pci_enable_ats(struct pci_dev *dev, int ps)
32{
33 return -ENODEV;
34}
35
36static inline void pci_disable_ats(struct pci_dev *dev)
37{
38}
39
40static inline int pci_ats_queue_depth(struct pci_dev *dev)
41{
42 return -ENODEV;
43}
44
45static inline int pci_ats_enabled(struct pci_dev *dev)
46{
47 return 0;
48}
49
50#endif /* CONFIG_PCI_IOV */
51
52#endif /* LINUX_PCI_ATS_H*/
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 4bfb0471f106..db07bfd9298e 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -649,7 +649,7 @@ out_err:
649 return -ENOMEM; 649 return -ENOMEM;
650} 650}
651 651
652static int device_dma_allocations(struct device *dev) 652static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
653{ 653{
654 struct dma_debug_entry *entry; 654 struct dma_debug_entry *entry;
655 unsigned long flags; 655 unsigned long flags;
@@ -660,8 +660,10 @@ static int device_dma_allocations(struct device *dev)
660 for (i = 0; i < HASH_SIZE; ++i) { 660 for (i = 0; i < HASH_SIZE; ++i) {
661 spin_lock(&dma_entry_hash[i].lock); 661 spin_lock(&dma_entry_hash[i].lock);
662 list_for_each_entry(entry, &dma_entry_hash[i].list, list) { 662 list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
663 if (entry->dev == dev) 663 if (entry->dev == dev) {
664 count += 1; 664 count += 1;
665 *out_entry = entry;
666 }
665 } 667 }
666 spin_unlock(&dma_entry_hash[i].lock); 668 spin_unlock(&dma_entry_hash[i].lock);
667 } 669 }
@@ -674,6 +676,7 @@ static int device_dma_allocations(struct device *dev)
674static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) 676static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
675{ 677{
676 struct device *dev = data; 678 struct device *dev = data;
679 struct dma_debug_entry *uninitialized_var(entry);
677 int count; 680 int count;
678 681
679 if (global_disable) 682 if (global_disable)
@@ -681,12 +684,17 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti
681 684
682 switch (action) { 685 switch (action) {
683 case BUS_NOTIFY_UNBOUND_DRIVER: 686 case BUS_NOTIFY_UNBOUND_DRIVER:
684 count = device_dma_allocations(dev); 687 count = device_dma_allocations(dev, &entry);
685 if (count == 0) 688 if (count == 0)
686 break; 689 break;
687 err_printk(dev, NULL, "DMA-API: device driver has pending " 690 err_printk(dev, entry, "DMA-API: device driver has pending "
688 "DMA allocations while released from device " 691 "DMA allocations while released from device "
689 "[count=%d]\n", count); 692 "[count=%d]\n"
693 "One of leaked entries details: "
694 "[device address=0x%016llx] [size=%llu bytes] "
695 "[mapped with %s] [mapped as %s]\n",
696 count, entry->dev_addr, entry->size,
697 dir2name[entry->direction], type2name[entry->type]);
690 break; 698 break;
691 default: 699 default:
692 break; 700 break;