aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-10 14:08:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-10 14:08:21 -0500
commit1c8106528aa6bf16b3f457de80df1cf7462a49a4 (patch)
tree4aed009c4a36195fd14c9f8d70fe2723a49583da /drivers
parent1a464cbb3d483f2f195b614cffa4aa1b910a0440 (diff)
parentf93ea733878733f3e98475bc3e2ccf789bebcfb8 (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (53 commits) iommu/amd: Set IOTLB invalidation timeout iommu/amd: Init stats for iommu=pt iommu/amd: Remove unnecessary cache flushes in amd_iommu_resume iommu/amd: Add invalidate-context call-back iommu/amd: Add amd_iommu_device_info() function iommu/amd: Adapt IOMMU driver to PCI register name changes iommu/amd: Add invalid_ppr callback iommu/amd: Implement notifiers for IOMMUv2 iommu/amd: Implement IO page-fault handler iommu/amd: Add routines to bind/unbind a pasid iommu/amd: Implement device aquisition code for IOMMUv2 iommu/amd: Add driver stub for AMD IOMMUv2 support iommu/amd: Add stat counter for IOMMUv2 events iommu/amd: Add device errata handling iommu/amd: Add function to get IOMMUv2 domain for pdev iommu/amd: Implement function to send PPR completions iommu/amd: Implement functions to manage GCR3 table iommu/amd: Implement IOMMUv2 TLB flushing routines iommu/amd: Add support for IOMMUv2 domain mode iommu/amd: Add amd_iommu_domain_direct_map function ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/pci_root.c7
-rw-r--r--drivers/iommu/Kconfig13
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd_iommu.c883
-rw-r--r--drivers/iommu/amd_iommu_init.c133
-rw-r--r--drivers/iommu/amd_iommu_proto.h24
-rw-r--r--drivers/iommu/amd_iommu_types.h118
-rw-r--r--drivers/iommu/amd_iommu_v2.c994
-rw-r--r--drivers/iommu/intel-iommu.c79
-rw-r--r--drivers/iommu/iommu.c177
-rw-r--r--drivers/iommu/msm_iommu.c25
-rw-r--r--drivers/iommu/omap-iommu.c80
-rw-r--r--drivers/iommu/omap-iovmm.c48
-rw-r--r--drivers/media/video/omap3isp/isp.c30
-rw-r--r--drivers/media/video/omap3isp/isp.h2
-rw-r--r--drivers/media/video/omap3isp/ispccdc.c18
-rw-r--r--drivers/media/video/omap3isp/ispstat.c8
-rw-r--r--drivers/media/video/omap3isp/ispvideo.c4
-rw-r--r--drivers/pci/ats.c90
-rw-r--r--drivers/pci/hotplug/pciehp.h1
-rw-r--r--drivers/pci/hotplug/pciehp_core.c11
-rw-r--r--drivers/pci/hotplug/pciehp_ctrl.c4
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c1
-rw-r--r--drivers/pci/msi.c121
-rw-r--r--drivers/pci/pci-acpi.c13
-rw-r--r--drivers/pci/pcie/aspm.c58
26 files changed, 2665 insertions, 278 deletions
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 2672c798272..7aff6312ce7 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -596,6 +596,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
596 if (ACPI_SUCCESS(status)) { 596 if (ACPI_SUCCESS(status)) {
597 dev_info(root->bus->bridge, 597 dev_info(root->bus->bridge,
598 "ACPI _OSC control (0x%02x) granted\n", flags); 598 "ACPI _OSC control (0x%02x) granted\n", flags);
599 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
600 /*
601 * We have ASPM control, but the FADT indicates
602 * that it's unsupported. Clear it.
603 */
604 pcie_clear_aspm(root->bus);
605 }
599 } else { 606 } else {
600 dev_info(root->bus->bridge, 607 dev_info(root->bus->bridge,
601 "ACPI _OSC request failed (%s), " 608 "ACPI _OSC request failed (%s), "
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 5414253b185..6bea6962f8e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -34,7 +34,9 @@ config AMD_IOMMU
34 bool "AMD IOMMU support" 34 bool "AMD IOMMU support"
35 select SWIOTLB 35 select SWIOTLB
36 select PCI_MSI 36 select PCI_MSI
37 select PCI_IOV 37 select PCI_ATS
38 select PCI_PRI
39 select PCI_PASID
38 select IOMMU_API 40 select IOMMU_API
39 depends on X86_64 && PCI && ACPI 41 depends on X86_64 && PCI && ACPI
40 ---help--- 42 ---help---
@@ -58,6 +60,15 @@ config AMD_IOMMU_STATS
58 information to userspace via debugfs. 60 information to userspace via debugfs.
59 If unsure, say N. 61 If unsure, say N.
60 62
63config AMD_IOMMU_V2
64 tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
65 depends on AMD_IOMMU && PROFILING && EXPERIMENTAL
66 select MMU_NOTIFIER
67 ---help---
68 This option enables support for the AMD IOMMUv2 features of the IOMMU
69 hardware. Select this option if you want to use devices that support
70 the the PCI PRI and PASID interface.
71
61# Intel IOMMU support 72# Intel IOMMU support
62config DMAR_TABLE 73config DMAR_TABLE
63 bool 74 bool
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 2f4448794bc..0e36b4934af 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
1obj-$(CONFIG_IOMMU_API) += iommu.o 1obj-$(CONFIG_IOMMU_API) += iommu.o
2obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o 2obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
3obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o 3obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
4obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
4obj-$(CONFIG_DMAR_TABLE) += dmar.o 5obj-$(CONFIG_DMAR_TABLE) += dmar.o
5obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o 6obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
6obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o 7obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4ee277a8521..cce1f03b889 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -17,6 +17,7 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#include <linux/ratelimit.h>
20#include <linux/pci.h> 21#include <linux/pci.h>
21#include <linux/pci-ats.h> 22#include <linux/pci-ats.h>
22#include <linux/bitmap.h> 23#include <linux/bitmap.h>
@@ -28,6 +29,8 @@
28#include <linux/iommu.h> 29#include <linux/iommu.h>
29#include <linux/delay.h> 30#include <linux/delay.h>
30#include <linux/amd-iommu.h> 31#include <linux/amd-iommu.h>
32#include <linux/notifier.h>
33#include <linux/export.h>
31#include <asm/msidef.h> 34#include <asm/msidef.h>
32#include <asm/proto.h> 35#include <asm/proto.h>
33#include <asm/iommu.h> 36#include <asm/iommu.h>
@@ -41,6 +44,24 @@
41 44
42#define LOOP_TIMEOUT 100000 45#define LOOP_TIMEOUT 100000
43 46
47/*
48 * This bitmap is used to advertise the page sizes our hardware support
49 * to the IOMMU core, which will then use this information to split
50 * physically contiguous memory regions it is mapping into page sizes
51 * that we support.
52 *
53 * Traditionally the IOMMU core just handed us the mappings directly,
54 * after making sure the size is an order of a 4KiB page and that the
55 * mapping has natural alignment.
56 *
57 * To retain this behavior, we currently advertise that we support
58 * all page sizes that are an order of 4KiB.
59 *
60 * If at some point we'd like to utilize the IOMMU core's new behavior,
61 * we could change this to advertise the real page sizes we support.
62 */
63#define AMD_IOMMU_PGSIZES (~0xFFFUL)
64
44static DEFINE_RWLOCK(amd_iommu_devtable_lock); 65static DEFINE_RWLOCK(amd_iommu_devtable_lock);
45 66
46/* A list of preallocated protection domains */ 67/* A list of preallocated protection domains */
@@ -59,6 +80,9 @@ static struct protection_domain *pt_domain;
59 80
60static struct iommu_ops amd_iommu_ops; 81static struct iommu_ops amd_iommu_ops;
61 82
83static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
84int amd_iommu_max_glx_val = -1;
85
62/* 86/*
63 * general struct to manage commands send to an IOMMU 87 * general struct to manage commands send to an IOMMU
64 */ 88 */
@@ -67,6 +91,7 @@ struct iommu_cmd {
67}; 91};
68 92
69static void update_domain(struct protection_domain *domain); 93static void update_domain(struct protection_domain *domain);
94static int __init alloc_passthrough_domain(void);
70 95
71/**************************************************************************** 96/****************************************************************************
72 * 97 *
@@ -147,6 +172,33 @@ static struct iommu_dev_data *get_dev_data(struct device *dev)
147 return dev->archdata.iommu; 172 return dev->archdata.iommu;
148} 173}
149 174
175static bool pci_iommuv2_capable(struct pci_dev *pdev)
176{
177 static const int caps[] = {
178 PCI_EXT_CAP_ID_ATS,
179 PCI_EXT_CAP_ID_PRI,
180 PCI_EXT_CAP_ID_PASID,
181 };
182 int i, pos;
183
184 for (i = 0; i < 3; ++i) {
185 pos = pci_find_ext_capability(pdev, caps[i]);
186 if (pos == 0)
187 return false;
188 }
189
190 return true;
191}
192
193static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
194{
195 struct iommu_dev_data *dev_data;
196
197 dev_data = get_dev_data(&pdev->dev);
198
199 return dev_data->errata & (1 << erratum) ? true : false;
200}
201
150/* 202/*
151 * In this function the list of preallocated protection domains is traversed to 203 * In this function the list of preallocated protection domains is traversed to
152 * find the domain for a specific device 204 * find the domain for a specific device
@@ -204,6 +256,7 @@ static bool check_device(struct device *dev)
204 256
205static int iommu_init_device(struct device *dev) 257static int iommu_init_device(struct device *dev)
206{ 258{
259 struct pci_dev *pdev = to_pci_dev(dev);
207 struct iommu_dev_data *dev_data; 260 struct iommu_dev_data *dev_data;
208 u16 alias; 261 u16 alias;
209 262
@@ -228,6 +281,13 @@ static int iommu_init_device(struct device *dev)
228 dev_data->alias_data = alias_data; 281 dev_data->alias_data = alias_data;
229 } 282 }
230 283
284 if (pci_iommuv2_capable(pdev)) {
285 struct amd_iommu *iommu;
286
287 iommu = amd_iommu_rlookup_table[dev_data->devid];
288 dev_data->iommu_v2 = iommu->is_iommu_v2;
289 }
290
231 dev->archdata.iommu = dev_data; 291 dev->archdata.iommu = dev_data;
232 292
233 return 0; 293 return 0;
@@ -317,6 +377,11 @@ DECLARE_STATS_COUNTER(domain_flush_single);
317DECLARE_STATS_COUNTER(domain_flush_all); 377DECLARE_STATS_COUNTER(domain_flush_all);
318DECLARE_STATS_COUNTER(alloced_io_mem); 378DECLARE_STATS_COUNTER(alloced_io_mem);
319DECLARE_STATS_COUNTER(total_map_requests); 379DECLARE_STATS_COUNTER(total_map_requests);
380DECLARE_STATS_COUNTER(complete_ppr);
381DECLARE_STATS_COUNTER(invalidate_iotlb);
382DECLARE_STATS_COUNTER(invalidate_iotlb_all);
383DECLARE_STATS_COUNTER(pri_requests);
384
320 385
321static struct dentry *stats_dir; 386static struct dentry *stats_dir;
322static struct dentry *de_fflush; 387static struct dentry *de_fflush;
@@ -351,6 +416,10 @@ static void amd_iommu_stats_init(void)
351 amd_iommu_stats_add(&domain_flush_all); 416 amd_iommu_stats_add(&domain_flush_all);
352 amd_iommu_stats_add(&alloced_io_mem); 417 amd_iommu_stats_add(&alloced_io_mem);
353 amd_iommu_stats_add(&total_map_requests); 418 amd_iommu_stats_add(&total_map_requests);
419 amd_iommu_stats_add(&complete_ppr);
420 amd_iommu_stats_add(&invalidate_iotlb);
421 amd_iommu_stats_add(&invalidate_iotlb_all);
422 amd_iommu_stats_add(&pri_requests);
354} 423}
355 424
356#endif 425#endif
@@ -365,8 +434,8 @@ static void dump_dte_entry(u16 devid)
365{ 434{
366 int i; 435 int i;
367 436
368 for (i = 0; i < 8; ++i) 437 for (i = 0; i < 4; ++i)
369 pr_err("AMD-Vi: DTE[%d]: %08x\n", i, 438 pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
370 amd_iommu_dev_table[devid].data[i]); 439 amd_iommu_dev_table[devid].data[i]);
371} 440}
372 441
@@ -461,12 +530,84 @@ static void iommu_poll_events(struct amd_iommu *iommu)
461 spin_unlock_irqrestore(&iommu->lock, flags); 530 spin_unlock_irqrestore(&iommu->lock, flags);
462} 531}
463 532
533static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
534{
535 struct amd_iommu_fault fault;
536 volatile u64 *raw;
537 int i;
538
539 INC_STATS_COUNTER(pri_requests);
540
541 raw = (u64 *)(iommu->ppr_log + head);
542
543 /*
544 * Hardware bug: Interrupt may arrive before the entry is written to
545 * memory. If this happens we need to wait for the entry to arrive.
546 */
547 for (i = 0; i < LOOP_TIMEOUT; ++i) {
548 if (PPR_REQ_TYPE(raw[0]) != 0)
549 break;
550 udelay(1);
551 }
552
553 if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
554 pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
555 return;
556 }
557
558 fault.address = raw[1];
559 fault.pasid = PPR_PASID(raw[0]);
560 fault.device_id = PPR_DEVID(raw[0]);
561 fault.tag = PPR_TAG(raw[0]);
562 fault.flags = PPR_FLAGS(raw[0]);
563
564 /*
565 * To detect the hardware bug we need to clear the entry
566 * to back to zero.
567 */
568 raw[0] = raw[1] = 0;
569
570 atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
571}
572
573static void iommu_poll_ppr_log(struct amd_iommu *iommu)
574{
575 unsigned long flags;
576 u32 head, tail;
577
578 if (iommu->ppr_log == NULL)
579 return;
580
581 spin_lock_irqsave(&iommu->lock, flags);
582
583 head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
584 tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
585
586 while (head != tail) {
587
588 /* Handle PPR entry */
589 iommu_handle_ppr_entry(iommu, head);
590
591 /* Update and refresh ring-buffer state*/
592 head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
593 writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
594 tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
595 }
596
597 /* enable ppr interrupts again */
598 writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
599
600 spin_unlock_irqrestore(&iommu->lock, flags);
601}
602
464irqreturn_t amd_iommu_int_thread(int irq, void *data) 603irqreturn_t amd_iommu_int_thread(int irq, void *data)
465{ 604{
466 struct amd_iommu *iommu; 605 struct amd_iommu *iommu;
467 606
468 for_each_iommu(iommu) 607 for_each_iommu(iommu) {
469 iommu_poll_events(iommu); 608 iommu_poll_events(iommu);
609 iommu_poll_ppr_log(iommu);
610 }
470 611
471 return IRQ_HANDLED; 612 return IRQ_HANDLED;
472} 613}
@@ -595,6 +736,60 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
595 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 736 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
596} 737}
597 738
739static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
740 u64 address, bool size)
741{
742 memset(cmd, 0, sizeof(*cmd));
743
744 address &= ~(0xfffULL);
745
746 cmd->data[0] = pasid & PASID_MASK;
747 cmd->data[1] = domid;
748 cmd->data[2] = lower_32_bits(address);
749 cmd->data[3] = upper_32_bits(address);
750 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
751 cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
752 if (size)
753 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
754 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
755}
756
757static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
758 int qdep, u64 address, bool size)
759{
760 memset(cmd, 0, sizeof(*cmd));
761
762 address &= ~(0xfffULL);
763
764 cmd->data[0] = devid;
765 cmd->data[0] |= (pasid & 0xff) << 16;
766 cmd->data[0] |= (qdep & 0xff) << 24;
767 cmd->data[1] = devid;
768 cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16;
769 cmd->data[2] = lower_32_bits(address);
770 cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
771 cmd->data[3] = upper_32_bits(address);
772 if (size)
773 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
774 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
775}
776
777static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
778 int status, int tag, bool gn)
779{
780 memset(cmd, 0, sizeof(*cmd));
781
782 cmd->data[0] = devid;
783 if (gn) {
784 cmd->data[1] = pasid & PASID_MASK;
785 cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK;
786 }
787 cmd->data[3] = tag & 0x1ff;
788 cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
789
790 CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
791}
792
598static void build_inv_all(struct iommu_cmd *cmd) 793static void build_inv_all(struct iommu_cmd *cmd)
599{ 794{
600 memset(cmd, 0, sizeof(*cmd)); 795 memset(cmd, 0, sizeof(*cmd));
@@ -1496,6 +1691,48 @@ static void free_pagetable(struct protection_domain *domain)
1496 domain->pt_root = NULL; 1691 domain->pt_root = NULL;
1497} 1692}
1498 1693
1694static void free_gcr3_tbl_level1(u64 *tbl)
1695{
1696 u64 *ptr;
1697 int i;
1698
1699 for (i = 0; i < 512; ++i) {
1700 if (!(tbl[i] & GCR3_VALID))
1701 continue;
1702
1703 ptr = __va(tbl[i] & PAGE_MASK);
1704
1705 free_page((unsigned long)ptr);
1706 }
1707}
1708
1709static void free_gcr3_tbl_level2(u64 *tbl)
1710{
1711 u64 *ptr;
1712 int i;
1713
1714 for (i = 0; i < 512; ++i) {
1715 if (!(tbl[i] & GCR3_VALID))
1716 continue;
1717
1718 ptr = __va(tbl[i] & PAGE_MASK);
1719
1720 free_gcr3_tbl_level1(ptr);
1721 }
1722}
1723
1724static void free_gcr3_table(struct protection_domain *domain)
1725{
1726 if (domain->glx == 2)
1727 free_gcr3_tbl_level2(domain->gcr3_tbl);
1728 else if (domain->glx == 1)
1729 free_gcr3_tbl_level1(domain->gcr3_tbl);
1730 else if (domain->glx != 0)
1731 BUG();
1732
1733 free_page((unsigned long)domain->gcr3_tbl);
1734}
1735
1499/* 1736/*
1500 * Free a domain, only used if something went wrong in the 1737 * Free a domain, only used if something went wrong in the
1501 * allocation path and we need to free an already allocated page table 1738 * allocation path and we need to free an already allocated page table
@@ -1582,20 +1819,52 @@ static bool dma_ops_domain(struct protection_domain *domain)
1582 1819
1583static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) 1820static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1584{ 1821{
1585 u64 pte_root = virt_to_phys(domain->pt_root); 1822 u64 pte_root = 0;
1586 u32 flags = 0; 1823 u64 flags = 0;
1824
1825 if (domain->mode != PAGE_MODE_NONE)
1826 pte_root = virt_to_phys(domain->pt_root);
1587 1827
1588 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 1828 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1589 << DEV_ENTRY_MODE_SHIFT; 1829 << DEV_ENTRY_MODE_SHIFT;
1590 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 1830 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1591 1831
1832 flags = amd_iommu_dev_table[devid].data[1];
1833
1592 if (ats) 1834 if (ats)
1593 flags |= DTE_FLAG_IOTLB; 1835 flags |= DTE_FLAG_IOTLB;
1594 1836
1595 amd_iommu_dev_table[devid].data[3] |= flags; 1837 if (domain->flags & PD_IOMMUV2_MASK) {
1596 amd_iommu_dev_table[devid].data[2] = domain->id; 1838 u64 gcr3 = __pa(domain->gcr3_tbl);
1597 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); 1839 u64 glx = domain->glx;
1598 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); 1840 u64 tmp;
1841
1842 pte_root |= DTE_FLAG_GV;
1843 pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
1844
1845 /* First mask out possible old values for GCR3 table */
1846 tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1847 flags &= ~tmp;
1848
1849 tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1850 flags &= ~tmp;
1851
1852 /* Encode GCR3 table into DTE */
1853 tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
1854 pte_root |= tmp;
1855
1856 tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
1857 flags |= tmp;
1858
1859 tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
1860 flags |= tmp;
1861 }
1862
1863 flags &= ~(0xffffUL);
1864 flags |= domain->id;
1865
1866 amd_iommu_dev_table[devid].data[1] = flags;
1867 amd_iommu_dev_table[devid].data[0] = pte_root;
1599} 1868}
1600 1869
1601static void clear_dte_entry(u16 devid) 1870static void clear_dte_entry(u16 devid)
@@ -1603,7 +1872,6 @@ static void clear_dte_entry(u16 devid)
1603 /* remove entry from the device table seen by the hardware */ 1872 /* remove entry from the device table seen by the hardware */
1604 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; 1873 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1605 amd_iommu_dev_table[devid].data[1] = 0; 1874 amd_iommu_dev_table[devid].data[1] = 0;
1606 amd_iommu_dev_table[devid].data[2] = 0;
1607 1875
1608 amd_iommu_apply_erratum_63(devid); 1876 amd_iommu_apply_erratum_63(devid);
1609} 1877}
@@ -1696,6 +1964,93 @@ out_unlock:
1696 return ret; 1964 return ret;
1697} 1965}
1698 1966
1967
1968static void pdev_iommuv2_disable(struct pci_dev *pdev)
1969{
1970 pci_disable_ats(pdev);
1971 pci_disable_pri(pdev);
1972 pci_disable_pasid(pdev);
1973}
1974
1975/* FIXME: Change generic reset-function to do the same */
1976static int pri_reset_while_enabled(struct pci_dev *pdev)
1977{
1978 u16 control;
1979 int pos;
1980
1981 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
1982 if (!pos)
1983 return -EINVAL;
1984
1985 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
1986 control |= PCI_PRI_CTRL_RESET;
1987 pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
1988
1989 return 0;
1990}
1991
1992static int pdev_iommuv2_enable(struct pci_dev *pdev)
1993{
1994 bool reset_enable;
1995 int reqs, ret;
1996
1997 /* FIXME: Hardcode number of outstanding requests for now */
1998 reqs = 32;
1999 if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
2000 reqs = 1;
2001 reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
2002
2003 /* Only allow access to user-accessible pages */
2004 ret = pci_enable_pasid(pdev, 0);
2005 if (ret)
2006 goto out_err;
2007
2008 /* First reset the PRI state of the device */
2009 ret = pci_reset_pri(pdev);
2010 if (ret)
2011 goto out_err;
2012
2013 /* Enable PRI */
2014 ret = pci_enable_pri(pdev, reqs);
2015 if (ret)
2016 goto out_err;
2017
2018 if (reset_enable) {
2019 ret = pri_reset_while_enabled(pdev);
2020 if (ret)
2021 goto out_err;
2022 }
2023
2024 ret = pci_enable_ats(pdev, PAGE_SHIFT);
2025 if (ret)
2026 goto out_err;
2027
2028 return 0;
2029
2030out_err:
2031 pci_disable_pri(pdev);
2032 pci_disable_pasid(pdev);
2033
2034 return ret;
2035}
2036
2037/* FIXME: Move this to PCI code */
2038#define PCI_PRI_TLP_OFF (1 << 2)
2039
2040bool pci_pri_tlp_required(struct pci_dev *pdev)
2041{
2042 u16 control;
2043 int pos;
2044
2045 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
2046 if (!pos)
2047 return false;
2048
2049 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
2050
2051 return (control & PCI_PRI_TLP_OFF) ? true : false;
2052}
2053
1699/* 2054/*
1700 * If a device is not yet associated with a domain, this function does 2055 * If a device is not yet associated with a domain, this function does
1701 * assigns it visible for the hardware 2056 * assigns it visible for the hardware
@@ -1710,7 +2065,18 @@ static int attach_device(struct device *dev,
1710 2065
1711 dev_data = get_dev_data(dev); 2066 dev_data = get_dev_data(dev);
1712 2067
1713 if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) { 2068 if (domain->flags & PD_IOMMUV2_MASK) {
2069 if (!dev_data->iommu_v2 || !dev_data->passthrough)
2070 return -EINVAL;
2071
2072 if (pdev_iommuv2_enable(pdev) != 0)
2073 return -EINVAL;
2074
2075 dev_data->ats.enabled = true;
2076 dev_data->ats.qdep = pci_ats_queue_depth(pdev);
2077 dev_data->pri_tlp = pci_pri_tlp_required(pdev);
2078 } else if (amd_iommu_iotlb_sup &&
2079 pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
1714 dev_data->ats.enabled = true; 2080 dev_data->ats.enabled = true;
1715 dev_data->ats.qdep = pci_ats_queue_depth(pdev); 2081 dev_data->ats.qdep = pci_ats_queue_depth(pdev);
1716 } 2082 }
@@ -1760,7 +2126,7 @@ static void __detach_device(struct iommu_dev_data *dev_data)
1760 * passthrough domain if it is detached from any other domain. 2126 * passthrough domain if it is detached from any other domain.
1761 * Make sure we can deassign from the pt_domain itself. 2127 * Make sure we can deassign from the pt_domain itself.
1762 */ 2128 */
1763 if (iommu_pass_through && 2129 if (dev_data->passthrough &&
1764 (dev_data->domain == NULL && domain != pt_domain)) 2130 (dev_data->domain == NULL && domain != pt_domain))
1765 __attach_device(dev_data, pt_domain); 2131 __attach_device(dev_data, pt_domain);
1766} 2132}
@@ -1770,20 +2136,24 @@ static void __detach_device(struct iommu_dev_data *dev_data)
1770 */ 2136 */
1771static void detach_device(struct device *dev) 2137static void detach_device(struct device *dev)
1772{ 2138{
2139 struct protection_domain *domain;
1773 struct iommu_dev_data *dev_data; 2140 struct iommu_dev_data *dev_data;
1774 unsigned long flags; 2141 unsigned long flags;
1775 2142
1776 dev_data = get_dev_data(dev); 2143 dev_data = get_dev_data(dev);
2144 domain = dev_data->domain;
1777 2145
1778 /* lock device table */ 2146 /* lock device table */
1779 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 2147 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1780 __detach_device(dev_data); 2148 __detach_device(dev_data);
1781 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2149 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1782 2150
1783 if (dev_data->ats.enabled) { 2151 if (domain->flags & PD_IOMMUV2_MASK)
2152 pdev_iommuv2_disable(to_pci_dev(dev));
2153 else if (dev_data->ats.enabled)
1784 pci_disable_ats(to_pci_dev(dev)); 2154 pci_disable_ats(to_pci_dev(dev));
1785 dev_data->ats.enabled = false; 2155
1786 } 2156 dev_data->ats.enabled = false;
1787} 2157}
1788 2158
1789/* 2159/*
@@ -1818,18 +2188,20 @@ static struct protection_domain *domain_for_device(struct device *dev)
1818static int device_change_notifier(struct notifier_block *nb, 2188static int device_change_notifier(struct notifier_block *nb,
1819 unsigned long action, void *data) 2189 unsigned long action, void *data)
1820{ 2190{
1821 struct device *dev = data;
1822 u16 devid;
1823 struct protection_domain *domain;
1824 struct dma_ops_domain *dma_domain; 2191 struct dma_ops_domain *dma_domain;
2192 struct protection_domain *domain;
2193 struct iommu_dev_data *dev_data;
2194 struct device *dev = data;
1825 struct amd_iommu *iommu; 2195 struct amd_iommu *iommu;
1826 unsigned long flags; 2196 unsigned long flags;
2197 u16 devid;
1827 2198
1828 if (!check_device(dev)) 2199 if (!check_device(dev))
1829 return 0; 2200 return 0;
1830 2201
1831 devid = get_device_id(dev); 2202 devid = get_device_id(dev);
1832 iommu = amd_iommu_rlookup_table[devid]; 2203 iommu = amd_iommu_rlookup_table[devid];
2204 dev_data = get_dev_data(dev);
1833 2205
1834 switch (action) { 2206 switch (action) {
1835 case BUS_NOTIFY_UNBOUND_DRIVER: 2207 case BUS_NOTIFY_UNBOUND_DRIVER:
@@ -1838,7 +2210,7 @@ static int device_change_notifier(struct notifier_block *nb,
1838 2210
1839 if (!domain) 2211 if (!domain)
1840 goto out; 2212 goto out;
1841 if (iommu_pass_through) 2213 if (dev_data->passthrough)
1842 break; 2214 break;
1843 detach_device(dev); 2215 detach_device(dev);
1844 break; 2216 break;
@@ -2434,8 +2806,9 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2434 */ 2806 */
2435static void prealloc_protection_domains(void) 2807static void prealloc_protection_domains(void)
2436{ 2808{
2437 struct pci_dev *dev = NULL; 2809 struct iommu_dev_data *dev_data;
2438 struct dma_ops_domain *dma_dom; 2810 struct dma_ops_domain *dma_dom;
2811 struct pci_dev *dev = NULL;
2439 u16 devid; 2812 u16 devid;
2440 2813
2441 for_each_pci_dev(dev) { 2814 for_each_pci_dev(dev) {
@@ -2444,6 +2817,16 @@ static void prealloc_protection_domains(void)
2444 if (!check_device(&dev->dev)) 2817 if (!check_device(&dev->dev))
2445 continue; 2818 continue;
2446 2819
2820 dev_data = get_dev_data(&dev->dev);
2821 if (!amd_iommu_force_isolation && dev_data->iommu_v2) {
2822 /* Make sure passthrough domain is allocated */
2823 alloc_passthrough_domain();
2824 dev_data->passthrough = true;
2825 attach_device(&dev->dev, pt_domain);
2826 pr_info("AMD-Vi: Using passthough domain for device %s\n",
2827 dev_name(&dev->dev));
2828 }
2829
2447 /* Is there already any domain for it? */ 2830 /* Is there already any domain for it? */
2448 if (domain_for_device(&dev->dev)) 2831 if (domain_for_device(&dev->dev))
2449 continue; 2832 continue;
@@ -2474,6 +2857,7 @@ static struct dma_map_ops amd_iommu_dma_ops = {
2474 2857
2475static unsigned device_dma_ops_init(void) 2858static unsigned device_dma_ops_init(void)
2476{ 2859{
2860 struct iommu_dev_data *dev_data;
2477 struct pci_dev *pdev = NULL; 2861 struct pci_dev *pdev = NULL;
2478 unsigned unhandled = 0; 2862 unsigned unhandled = 0;
2479 2863
@@ -2483,7 +2867,12 @@ static unsigned device_dma_ops_init(void)
2483 continue; 2867 continue;
2484 } 2868 }
2485 2869
2486 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; 2870 dev_data = get_dev_data(&pdev->dev);
2871
2872 if (!dev_data->passthrough)
2873 pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
2874 else
2875 pdev->dev.archdata.dma_ops = &nommu_dma_ops;
2487 } 2876 }
2488 2877
2489 return unhandled; 2878 return unhandled;
@@ -2610,6 +2999,20 @@ out_err:
2610 return NULL; 2999 return NULL;
2611} 3000}
2612 3001
3002static int __init alloc_passthrough_domain(void)
3003{
3004 if (pt_domain != NULL)
3005 return 0;
3006
3007 /* allocate passthrough domain */
3008 pt_domain = protection_domain_alloc();
3009 if (!pt_domain)
3010 return -ENOMEM;
3011
3012 pt_domain->mode = PAGE_MODE_NONE;
3013
3014 return 0;
3015}
2613static int amd_iommu_domain_init(struct iommu_domain *dom) 3016static int amd_iommu_domain_init(struct iommu_domain *dom)
2614{ 3017{
2615 struct protection_domain *domain; 3018 struct protection_domain *domain;
@@ -2623,6 +3026,8 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
2623 if (!domain->pt_root) 3026 if (!domain->pt_root)
2624 goto out_free; 3027 goto out_free;
2625 3028
3029 domain->iommu_domain = dom;
3030
2626 dom->priv = domain; 3031 dom->priv = domain;
2627 3032
2628 return 0; 3033 return 0;
@@ -2645,7 +3050,11 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2645 3050
2646 BUG_ON(domain->dev_cnt != 0); 3051 BUG_ON(domain->dev_cnt != 0);
2647 3052
2648 free_pagetable(domain); 3053 if (domain->mode != PAGE_MODE_NONE)
3054 free_pagetable(domain);
3055
3056 if (domain->flags & PD_IOMMUV2_MASK)
3057 free_gcr3_table(domain);
2649 3058
2650 protection_domain_free(domain); 3059 protection_domain_free(domain);
2651 3060
@@ -2702,13 +3111,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
2702} 3111}
2703 3112
2704static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, 3113static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2705 phys_addr_t paddr, int gfp_order, int iommu_prot) 3114 phys_addr_t paddr, size_t page_size, int iommu_prot)
2706{ 3115{
2707 unsigned long page_size = 0x1000UL << gfp_order;
2708 struct protection_domain *domain = dom->priv; 3116 struct protection_domain *domain = dom->priv;
2709 int prot = 0; 3117 int prot = 0;
2710 int ret; 3118 int ret;
2711 3119
3120 if (domain->mode == PAGE_MODE_NONE)
3121 return -EINVAL;
3122
2712 if (iommu_prot & IOMMU_READ) 3123 if (iommu_prot & IOMMU_READ)
2713 prot |= IOMMU_PROT_IR; 3124 prot |= IOMMU_PROT_IR;
2714 if (iommu_prot & IOMMU_WRITE) 3125 if (iommu_prot & IOMMU_WRITE)
@@ -2721,13 +3132,14 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
2721 return ret; 3132 return ret;
2722} 3133}
2723 3134
2724static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, 3135static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2725 int gfp_order) 3136 size_t page_size)
2726{ 3137{
2727 struct protection_domain *domain = dom->priv; 3138 struct protection_domain *domain = dom->priv;
2728 unsigned long page_size, unmap_size; 3139 size_t unmap_size;
2729 3140
2730 page_size = 0x1000UL << gfp_order; 3141 if (domain->mode == PAGE_MODE_NONE)
3142 return -EINVAL;
2731 3143
2732 mutex_lock(&domain->api_lock); 3144 mutex_lock(&domain->api_lock);
2733 unmap_size = iommu_unmap_page(domain, iova, page_size); 3145 unmap_size = iommu_unmap_page(domain, iova, page_size);
@@ -2735,7 +3147,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
2735 3147
2736 domain_flush_tlb_pde(domain); 3148 domain_flush_tlb_pde(domain);
2737 3149
2738 return get_order(unmap_size); 3150 return unmap_size;
2739} 3151}
2740 3152
2741static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 3153static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2746,6 +3158,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2746 phys_addr_t paddr; 3158 phys_addr_t paddr;
2747 u64 *pte, __pte; 3159 u64 *pte, __pte;
2748 3160
3161 if (domain->mode == PAGE_MODE_NONE)
3162 return iova;
3163
2749 pte = fetch_pte(domain, iova); 3164 pte = fetch_pte(domain, iova);
2750 3165
2751 if (!pte || !IOMMU_PTE_PRESENT(*pte)) 3166 if (!pte || !IOMMU_PTE_PRESENT(*pte))
@@ -2773,6 +3188,26 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2773 return 0; 3188 return 0;
2774} 3189}
2775 3190
3191static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
3192{
3193 struct iommu_dev_data *dev_data = dev->archdata.iommu;
3194 struct pci_dev *pdev = to_pci_dev(dev);
3195 u16 devid;
3196
3197 if (!dev_data)
3198 return -ENODEV;
3199
3200 if (pdev->is_virtfn || !iommu_group_mf)
3201 devid = dev_data->devid;
3202 else
3203 devid = calc_devid(pdev->bus->number,
3204 PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
3205
3206 *groupid = amd_iommu_alias_table[devid];
3207
3208 return 0;
3209}
3210
2776static struct iommu_ops amd_iommu_ops = { 3211static struct iommu_ops amd_iommu_ops = {
2777 .domain_init = amd_iommu_domain_init, 3212 .domain_init = amd_iommu_domain_init,
2778 .domain_destroy = amd_iommu_domain_destroy, 3213 .domain_destroy = amd_iommu_domain_destroy,
@@ -2782,6 +3217,8 @@ static struct iommu_ops amd_iommu_ops = {
2782 .unmap = amd_iommu_unmap, 3217 .unmap = amd_iommu_unmap,
2783 .iova_to_phys = amd_iommu_iova_to_phys, 3218 .iova_to_phys = amd_iommu_iova_to_phys,
2784 .domain_has_cap = amd_iommu_domain_has_cap, 3219 .domain_has_cap = amd_iommu_domain_has_cap,
3220 .device_group = amd_iommu_device_group,
3221 .pgsize_bitmap = AMD_IOMMU_PGSIZES,
2785}; 3222};
2786 3223
2787/***************************************************************************** 3224/*****************************************************************************
@@ -2796,21 +3233,23 @@ static struct iommu_ops amd_iommu_ops = {
2796 3233
2797int __init amd_iommu_init_passthrough(void) 3234int __init amd_iommu_init_passthrough(void)
2798{ 3235{
2799 struct amd_iommu *iommu; 3236 struct iommu_dev_data *dev_data;
2800 struct pci_dev *dev = NULL; 3237 struct pci_dev *dev = NULL;
3238 struct amd_iommu *iommu;
2801 u16 devid; 3239 u16 devid;
3240 int ret;
2802 3241
2803 /* allocate passthrough domain */ 3242 ret = alloc_passthrough_domain();
2804 pt_domain = protection_domain_alloc(); 3243 if (ret)
2805 if (!pt_domain) 3244 return ret;
2806 return -ENOMEM;
2807
2808 pt_domain->mode |= PAGE_MODE_NONE;
2809 3245
2810 for_each_pci_dev(dev) { 3246 for_each_pci_dev(dev) {
2811 if (!check_device(&dev->dev)) 3247 if (!check_device(&dev->dev))
2812 continue; 3248 continue;
2813 3249
3250 dev_data = get_dev_data(&dev->dev);
3251 dev_data->passthrough = true;
3252
2814 devid = get_device_id(&dev->dev); 3253 devid = get_device_id(&dev->dev);
2815 3254
2816 iommu = amd_iommu_rlookup_table[devid]; 3255 iommu = amd_iommu_rlookup_table[devid];
@@ -2820,7 +3259,375 @@ int __init amd_iommu_init_passthrough(void)
2820 attach_device(&dev->dev, pt_domain); 3259 attach_device(&dev->dev, pt_domain);
2821 } 3260 }
2822 3261
3262 amd_iommu_stats_init();
3263
2823 pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); 3264 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
2824 3265
2825 return 0; 3266 return 0;
2826} 3267}
3268
3269/* IOMMUv2 specific functions */
3270int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
3271{
3272 return atomic_notifier_chain_register(&ppr_notifier, nb);
3273}
3274EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
3275
3276int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
3277{
3278 return atomic_notifier_chain_unregister(&ppr_notifier, nb);
3279}
3280EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
3281
3282void amd_iommu_domain_direct_map(struct iommu_domain *dom)
3283{
3284 struct protection_domain *domain = dom->priv;
3285 unsigned long flags;
3286
3287 spin_lock_irqsave(&domain->lock, flags);
3288
3289 /* Update data structure */
3290 domain->mode = PAGE_MODE_NONE;
3291 domain->updated = true;
3292
3293 /* Make changes visible to IOMMUs */
3294 update_domain(domain);
3295
3296 /* Page-table is not visible to IOMMU anymore, so free it */
3297 free_pagetable(domain);
3298
3299 spin_unlock_irqrestore(&domain->lock, flags);
3300}
3301EXPORT_SYMBOL(amd_iommu_domain_direct_map);
3302
3303int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
3304{
3305 struct protection_domain *domain = dom->priv;
3306 unsigned long flags;
3307 int levels, ret;
3308
3309 if (pasids <= 0 || pasids > (PASID_MASK + 1))
3310 return -EINVAL;
3311
3312 /* Number of GCR3 table levels required */
3313 for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
3314 levels += 1;
3315
3316 if (levels > amd_iommu_max_glx_val)
3317 return -EINVAL;
3318
3319 spin_lock_irqsave(&domain->lock, flags);
3320
3321 /*
3322 * Save us all sanity checks whether devices already in the
3323 * domain support IOMMUv2. Just force that the domain has no
3324 * devices attached when it is switched into IOMMUv2 mode.
3325 */
3326 ret = -EBUSY;
3327 if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
3328 goto out;
3329
3330 ret = -ENOMEM;
3331 domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
3332 if (domain->gcr3_tbl == NULL)
3333 goto out;
3334
3335 domain->glx = levels;
3336 domain->flags |= PD_IOMMUV2_MASK;
3337 domain->updated = true;
3338
3339 update_domain(domain);
3340
3341 ret = 0;
3342
3343out:
3344 spin_unlock_irqrestore(&domain->lock, flags);
3345
3346 return ret;
3347}
3348EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
3349
3350static int __flush_pasid(struct protection_domain *domain, int pasid,
3351 u64 address, bool size)
3352{
3353 struct iommu_dev_data *dev_data;
3354 struct iommu_cmd cmd;
3355 int i, ret;
3356
3357 if (!(domain->flags & PD_IOMMUV2_MASK))
3358 return -EINVAL;
3359
3360 build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size);
3361
3362 /*
3363 * IOMMU TLB needs to be flushed before Device TLB to
3364 * prevent device TLB refill from IOMMU TLB
3365 */
3366 for (i = 0; i < amd_iommus_present; ++i) {
3367 if (domain->dev_iommu[i] == 0)
3368 continue;
3369
3370 ret = iommu_queue_command(amd_iommus[i], &cmd);
3371 if (ret != 0)
3372 goto out;
3373 }
3374
3375 /* Wait until IOMMU TLB flushes are complete */
3376 domain_flush_complete(domain);
3377
3378 /* Now flush device TLBs */
3379 list_for_each_entry(dev_data, &domain->dev_list, list) {
3380 struct amd_iommu *iommu;
3381 int qdep;
3382
3383 BUG_ON(!dev_data->ats.enabled);
3384
3385 qdep = dev_data->ats.qdep;
3386 iommu = amd_iommu_rlookup_table[dev_data->devid];
3387
3388 build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid,
3389 qdep, address, size);
3390
3391 ret = iommu_queue_command(iommu, &cmd);
3392 if (ret != 0)
3393 goto out;
3394 }
3395
3396 /* Wait until all device TLBs are flushed */
3397 domain_flush_complete(domain);
3398
3399 ret = 0;
3400
3401out:
3402
3403 return ret;
3404}
3405
3406static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
3407 u64 address)
3408{
3409 INC_STATS_COUNTER(invalidate_iotlb);
3410
3411 return __flush_pasid(domain, pasid, address, false);
3412}
3413
3414int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
3415 u64 address)
3416{
3417 struct protection_domain *domain = dom->priv;
3418 unsigned long flags;
3419 int ret;
3420
3421 spin_lock_irqsave(&domain->lock, flags);
3422 ret = __amd_iommu_flush_page(domain, pasid, address);
3423 spin_unlock_irqrestore(&domain->lock, flags);
3424
3425 return ret;
3426}
3427EXPORT_SYMBOL(amd_iommu_flush_page);
3428
3429static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
3430{
3431 INC_STATS_COUNTER(invalidate_iotlb_all);
3432
3433 return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
3434 true);
3435}
3436
3437int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
3438{
3439 struct protection_domain *domain = dom->priv;
3440 unsigned long flags;
3441 int ret;
3442
3443 spin_lock_irqsave(&domain->lock, flags);
3444 ret = __amd_iommu_flush_tlb(domain, pasid);
3445 spin_unlock_irqrestore(&domain->lock, flags);
3446
3447 return ret;
3448}
3449EXPORT_SYMBOL(amd_iommu_flush_tlb);
3450
3451static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
3452{
3453 int index;
3454 u64 *pte;
3455
3456 while (true) {
3457
3458 index = (pasid >> (9 * level)) & 0x1ff;
3459 pte = &root[index];
3460
3461 if (level == 0)
3462 break;
3463
3464 if (!(*pte & GCR3_VALID)) {
3465 if (!alloc)
3466 return NULL;
3467
3468 root = (void *)get_zeroed_page(GFP_ATOMIC);
3469 if (root == NULL)
3470 return NULL;
3471
3472 *pte = __pa(root) | GCR3_VALID;
3473 }
3474
3475 root = __va(*pte & PAGE_MASK);
3476
3477 level -= 1;
3478 }
3479
3480 return pte;
3481}
3482
3483static int __set_gcr3(struct protection_domain *domain, int pasid,
3484 unsigned long cr3)
3485{
3486 u64 *pte;
3487
3488 if (domain->mode != PAGE_MODE_NONE)
3489 return -EINVAL;
3490
3491 pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
3492 if (pte == NULL)
3493 return -ENOMEM;
3494
3495 *pte = (cr3 & PAGE_MASK) | GCR3_VALID;
3496
3497 return __amd_iommu_flush_tlb(domain, pasid);
3498}
3499
3500static int __clear_gcr3(struct protection_domain *domain, int pasid)
3501{
3502 u64 *pte;
3503
3504 if (domain->mode != PAGE_MODE_NONE)
3505 return -EINVAL;
3506
3507 pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
3508 if (pte == NULL)
3509 return 0;
3510
3511 *pte = 0;
3512
3513 return __amd_iommu_flush_tlb(domain, pasid);
3514}
3515
3516int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
3517 unsigned long cr3)
3518{
3519 struct protection_domain *domain = dom->priv;
3520 unsigned long flags;
3521 int ret;
3522
3523 spin_lock_irqsave(&domain->lock, flags);
3524 ret = __set_gcr3(domain, pasid, cr3);
3525 spin_unlock_irqrestore(&domain->lock, flags);
3526
3527 return ret;
3528}
3529EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
3530
3531int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
3532{
3533 struct protection_domain *domain = dom->priv;
3534 unsigned long flags;
3535 int ret;
3536
3537 spin_lock_irqsave(&domain->lock, flags);
3538 ret = __clear_gcr3(domain, pasid);
3539 spin_unlock_irqrestore(&domain->lock, flags);
3540
3541 return ret;
3542}
3543EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
3544
3545int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
3546 int status, int tag)
3547{
3548 struct iommu_dev_data *dev_data;
3549 struct amd_iommu *iommu;
3550 struct iommu_cmd cmd;
3551
3552 INC_STATS_COUNTER(complete_ppr);
3553
3554 dev_data = get_dev_data(&pdev->dev);
3555 iommu = amd_iommu_rlookup_table[dev_data->devid];
3556
3557 build_complete_ppr(&cmd, dev_data->devid, pasid, status,
3558 tag, dev_data->pri_tlp);
3559
3560 return iommu_queue_command(iommu, &cmd);
3561}
3562EXPORT_SYMBOL(amd_iommu_complete_ppr);
3563
3564struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
3565{
3566 struct protection_domain *domain;
3567
3568 domain = get_domain(&pdev->dev);
3569 if (IS_ERR(domain))
3570 return NULL;
3571
3572 /* Only return IOMMUv2 domains */
3573 if (!(domain->flags & PD_IOMMUV2_MASK))
3574 return NULL;
3575
3576 return domain->iommu_domain;
3577}
3578EXPORT_SYMBOL(amd_iommu_get_v2_domain);
3579
3580void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
3581{
3582 struct iommu_dev_data *dev_data;
3583
3584 if (!amd_iommu_v2_supported())
3585 return;
3586
3587 dev_data = get_dev_data(&pdev->dev);
3588 dev_data->errata |= (1 << erratum);
3589}
3590EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
3591
3592int amd_iommu_device_info(struct pci_dev *pdev,
3593 struct amd_iommu_device_info *info)
3594{
3595 int max_pasids;
3596 int pos;
3597
3598 if (pdev == NULL || info == NULL)
3599 return -EINVAL;
3600
3601 if (!amd_iommu_v2_supported())
3602 return -EINVAL;
3603
3604 memset(info, 0, sizeof(*info));
3605
3606 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
3607 if (pos)
3608 info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
3609
3610 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
3611 if (pos)
3612 info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
3613
3614 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
3615 if (pos) {
3616 int features;
3617
3618 max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
3619 max_pasids = min(max_pasids, (1 << 20));
3620
3621 info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
3622 info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
3623
3624 features = pci_pasid_features(pdev);
3625 if (features & PCI_PASID_CAP_EXEC)
3626 info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
3627 if (features & PCI_PASID_CAP_PRIV)
3628 info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
3629 }
3630
3631 return 0;
3632}
3633EXPORT_SYMBOL(amd_iommu_device_info);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 82d2410f420..bdea288dc18 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -25,6 +25,7 @@
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
27#include <linux/amd-iommu.h> 27#include <linux/amd-iommu.h>
28#include <linux/export.h>
28#include <asm/pci-direct.h> 29#include <asm/pci-direct.h>
29#include <asm/iommu.h> 30#include <asm/iommu.h>
30#include <asm/gart.h> 31#include <asm/gart.h>
@@ -141,6 +142,12 @@ int amd_iommus_present;
141bool amd_iommu_np_cache __read_mostly; 142bool amd_iommu_np_cache __read_mostly;
142bool amd_iommu_iotlb_sup __read_mostly = true; 143bool amd_iommu_iotlb_sup __read_mostly = true;
143 144
145u32 amd_iommu_max_pasids __read_mostly = ~0;
146
147bool amd_iommu_v2_present __read_mostly;
148
149bool amd_iommu_force_isolation __read_mostly;
150
144/* 151/*
145 * The ACPI table parsing functions set this variable on an error 152 * The ACPI table parsing functions set this variable on an error
146 */ 153 */
@@ -299,6 +306,16 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
299 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 306 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
300} 307}
301 308
309static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
310{
311 u32 ctrl;
312
313 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
314 ctrl &= ~CTRL_INV_TO_MASK;
315 ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
316 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
317}
318
302/* Function to enable the hardware */ 319/* Function to enable the hardware */
303static void iommu_enable(struct amd_iommu *iommu) 320static void iommu_enable(struct amd_iommu *iommu)
304{ 321{
@@ -581,21 +598,69 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
581 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); 598 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
582} 599}
583 600
601/* allocates the memory where the IOMMU will log its events to */
602static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
603{
604 iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
605 get_order(PPR_LOG_SIZE));
606
607 if (iommu->ppr_log == NULL)
608 return NULL;
609
610 return iommu->ppr_log;
611}
612
613static void iommu_enable_ppr_log(struct amd_iommu *iommu)
614{
615 u64 entry;
616
617 if (iommu->ppr_log == NULL)
618 return;
619
620 entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
621
622 memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
623 &entry, sizeof(entry));
624
625 /* set head and tail to zero manually */
626 writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
627 writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
628
629 iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
630 iommu_feature_enable(iommu, CONTROL_PPR_EN);
631}
632
633static void __init free_ppr_log(struct amd_iommu *iommu)
634{
635 if (iommu->ppr_log == NULL)
636 return;
637
638 free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
639}
640
641static void iommu_enable_gt(struct amd_iommu *iommu)
642{
643 if (!iommu_feature(iommu, FEATURE_GT))
644 return;
645
646 iommu_feature_enable(iommu, CONTROL_GT_EN);
647}
648
584/* sets a specific bit in the device table entry. */ 649/* sets a specific bit in the device table entry. */
585static void set_dev_entry_bit(u16 devid, u8 bit) 650static void set_dev_entry_bit(u16 devid, u8 bit)
586{ 651{
587 int i = (bit >> 5) & 0x07; 652 int i = (bit >> 6) & 0x03;
588 int _bit = bit & 0x1f; 653 int _bit = bit & 0x3f;
589 654
590 amd_iommu_dev_table[devid].data[i] |= (1 << _bit); 655 amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
591} 656}
592 657
593static int get_dev_entry_bit(u16 devid, u8 bit) 658static int get_dev_entry_bit(u16 devid, u8 bit)
594{ 659{
595 int i = (bit >> 5) & 0x07; 660 int i = (bit >> 6) & 0x03;
596 int _bit = bit & 0x1f; 661 int _bit = bit & 0x3f;
597 662
598 return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; 663 return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
599} 664}
600 665
601 666
@@ -699,6 +764,32 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
699 764
700 iommu->features = ((u64)high << 32) | low; 765 iommu->features = ((u64)high << 32) | low;
701 766
767 if (iommu_feature(iommu, FEATURE_GT)) {
768 int glxval;
769 u32 pasids;
770 u64 shift;
771
772 shift = iommu->features & FEATURE_PASID_MASK;
773 shift >>= FEATURE_PASID_SHIFT;
774 pasids = (1 << shift);
775
776 amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
777
778 glxval = iommu->features & FEATURE_GLXVAL_MASK;
779 glxval >>= FEATURE_GLXVAL_SHIFT;
780
781 if (amd_iommu_max_glx_val == -1)
782 amd_iommu_max_glx_val = glxval;
783 else
784 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
785 }
786
787 if (iommu_feature(iommu, FEATURE_GT) &&
788 iommu_feature(iommu, FEATURE_PPR)) {
789 iommu->is_iommu_v2 = true;
790 amd_iommu_v2_present = true;
791 }
792
702 if (!is_rd890_iommu(iommu->dev)) 793 if (!is_rd890_iommu(iommu->dev))
703 return; 794 return;
704 795
@@ -901,6 +992,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu)
901{ 992{
902 free_command_buffer(iommu); 993 free_command_buffer(iommu);
903 free_event_buffer(iommu); 994 free_event_buffer(iommu);
995 free_ppr_log(iommu);
904 iommu_unmap_mmio_space(iommu); 996 iommu_unmap_mmio_space(iommu);
905} 997}
906 998
@@ -964,6 +1056,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
964 init_iommu_from_acpi(iommu, h); 1056 init_iommu_from_acpi(iommu, h);
965 init_iommu_devices(iommu); 1057 init_iommu_devices(iommu);
966 1058
1059 if (iommu_feature(iommu, FEATURE_PPR)) {
1060 iommu->ppr_log = alloc_ppr_log(iommu);
1061 if (!iommu->ppr_log)
1062 return -ENOMEM;
1063 }
1064
967 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) 1065 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
968 amd_iommu_np_cache = true; 1066 amd_iommu_np_cache = true;
969 1067
@@ -1050,6 +1148,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
1050 iommu->int_enabled = true; 1148 iommu->int_enabled = true;
1051 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 1149 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
1052 1150
1151 if (iommu->ppr_log != NULL)
1152 iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
1153
1053 return 0; 1154 return 0;
1054} 1155}
1055 1156
@@ -1209,6 +1310,9 @@ static void iommu_init_flags(struct amd_iommu *iommu)
1209 * make IOMMU memory accesses cache coherent 1310 * make IOMMU memory accesses cache coherent
1210 */ 1311 */
1211 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 1312 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1313
1314 /* Set IOTLB invalidation timeout to 1s */
1315 iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
1212} 1316}
1213 1317
1214static void iommu_apply_resume_quirks(struct amd_iommu *iommu) 1318static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
@@ -1274,6 +1378,8 @@ static void enable_iommus(void)
1274 iommu_set_device_table(iommu); 1378 iommu_set_device_table(iommu);
1275 iommu_enable_command_buffer(iommu); 1379 iommu_enable_command_buffer(iommu);
1276 iommu_enable_event_buffer(iommu); 1380 iommu_enable_event_buffer(iommu);
1381 iommu_enable_ppr_log(iommu);
1382 iommu_enable_gt(iommu);
1277 iommu_set_exclusion_range(iommu); 1383 iommu_set_exclusion_range(iommu);
1278 iommu_init_msi(iommu); 1384 iommu_init_msi(iommu);
1279 iommu_enable(iommu); 1385 iommu_enable(iommu);
@@ -1303,13 +1409,6 @@ static void amd_iommu_resume(void)
1303 1409
1304 /* re-load the hardware */ 1410 /* re-load the hardware */
1305 enable_iommus(); 1411 enable_iommus();
1306
1307 /*
1308 * we have to flush after the IOMMUs are enabled because a
1309 * disabled IOMMU will never execute the commands we send
1310 */
1311 for_each_iommu(iommu)
1312 iommu_flush_all_caches(iommu);
1313} 1412}
1314 1413
1315static int amd_iommu_suspend(void) 1414static int amd_iommu_suspend(void)
@@ -1560,6 +1659,8 @@ static int __init parse_amd_iommu_options(char *str)
1560 amd_iommu_unmap_flush = true; 1659 amd_iommu_unmap_flush = true;
1561 if (strncmp(str, "off", 3) == 0) 1660 if (strncmp(str, "off", 3) == 0)
1562 amd_iommu_disabled = true; 1661 amd_iommu_disabled = true;
1662 if (strncmp(str, "force_isolation", 15) == 0)
1663 amd_iommu_force_isolation = true;
1563 } 1664 }
1564 1665
1565 return 1; 1666 return 1;
@@ -1572,3 +1673,9 @@ IOMMU_INIT_FINISH(amd_iommu_detect,
1572 gart_iommu_hole_init, 1673 gart_iommu_hole_init,
1573 0, 1674 0,
1574 0); 1675 0);
1676
1677bool amd_iommu_v2_supported(void)
1678{
1679 return amd_iommu_v2_present;
1680}
1681EXPORT_SYMBOL(amd_iommu_v2_supported);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 7ffaa64410b..1a7f41c6cc6 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -31,6 +31,30 @@ extern int amd_iommu_init_devices(void);
31extern void amd_iommu_uninit_devices(void); 31extern void amd_iommu_uninit_devices(void);
32extern void amd_iommu_init_notifier(void); 32extern void amd_iommu_init_notifier(void);
33extern void amd_iommu_init_api(void); 33extern void amd_iommu_init_api(void);
34
35/* IOMMUv2 specific functions */
36struct iommu_domain;
37
38extern bool amd_iommu_v2_supported(void);
39extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
40extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
41extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
42extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
43extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
44 u64 address);
45extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
46extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
47 unsigned long cr3);
48extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
49extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
50
51#define PPR_SUCCESS 0x0
52#define PPR_INVALID 0x1
53#define PPR_FAILURE 0xf
54
55extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
56 int status, int tag);
57
34#ifndef CONFIG_AMD_IOMMU_STATS 58#ifndef CONFIG_AMD_IOMMU_STATS
35 59
36static inline void amd_iommu_stats_init(void) { } 60static inline void amd_iommu_stats_init(void) { }
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 5b9c5075e81..2452f3b7173 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -69,11 +69,14 @@
69#define MMIO_EXCL_BASE_OFFSET 0x0020 69#define MMIO_EXCL_BASE_OFFSET 0x0020
70#define MMIO_EXCL_LIMIT_OFFSET 0x0028 70#define MMIO_EXCL_LIMIT_OFFSET 0x0028
71#define MMIO_EXT_FEATURES 0x0030 71#define MMIO_EXT_FEATURES 0x0030
72#define MMIO_PPR_LOG_OFFSET 0x0038
72#define MMIO_CMD_HEAD_OFFSET 0x2000 73#define MMIO_CMD_HEAD_OFFSET 0x2000
73#define MMIO_CMD_TAIL_OFFSET 0x2008 74#define MMIO_CMD_TAIL_OFFSET 0x2008
74#define MMIO_EVT_HEAD_OFFSET 0x2010 75#define MMIO_EVT_HEAD_OFFSET 0x2010
75#define MMIO_EVT_TAIL_OFFSET 0x2018 76#define MMIO_EVT_TAIL_OFFSET 0x2018
76#define MMIO_STATUS_OFFSET 0x2020 77#define MMIO_STATUS_OFFSET 0x2020
78#define MMIO_PPR_HEAD_OFFSET 0x2030
79#define MMIO_PPR_TAIL_OFFSET 0x2038
77 80
78 81
79/* Extended Feature Bits */ 82/* Extended Feature Bits */
@@ -87,8 +90,17 @@
87#define FEATURE_HE (1ULL<<8) 90#define FEATURE_HE (1ULL<<8)
88#define FEATURE_PC (1ULL<<9) 91#define FEATURE_PC (1ULL<<9)
89 92
93#define FEATURE_PASID_SHIFT 32
94#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
95
96#define FEATURE_GLXVAL_SHIFT 14
97#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT)
98
99#define PASID_MASK 0x000fffff
100
90/* MMIO status bits */ 101/* MMIO status bits */
91#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 102#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
103#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
92 104
93/* event logging constants */ 105/* event logging constants */
94#define EVENT_ENTRY_SIZE 0x10 106#define EVENT_ENTRY_SIZE 0x10
@@ -115,6 +127,7 @@
115#define CONTROL_EVT_LOG_EN 0x02ULL 127#define CONTROL_EVT_LOG_EN 0x02ULL
116#define CONTROL_EVT_INT_EN 0x03ULL 128#define CONTROL_EVT_INT_EN 0x03ULL
117#define CONTROL_COMWAIT_EN 0x04ULL 129#define CONTROL_COMWAIT_EN 0x04ULL
130#define CONTROL_INV_TIMEOUT 0x05ULL
118#define CONTROL_PASSPW_EN 0x08ULL 131#define CONTROL_PASSPW_EN 0x08ULL
119#define CONTROL_RESPASSPW_EN 0x09ULL 132#define CONTROL_RESPASSPW_EN 0x09ULL
120#define CONTROL_COHERENT_EN 0x0aULL 133#define CONTROL_COHERENT_EN 0x0aULL
@@ -122,18 +135,34 @@
122#define CONTROL_CMDBUF_EN 0x0cULL 135#define CONTROL_CMDBUF_EN 0x0cULL
123#define CONTROL_PPFLOG_EN 0x0dULL 136#define CONTROL_PPFLOG_EN 0x0dULL
124#define CONTROL_PPFINT_EN 0x0eULL 137#define CONTROL_PPFINT_EN 0x0eULL
138#define CONTROL_PPR_EN 0x0fULL
139#define CONTROL_GT_EN 0x10ULL
140
141#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
142#define CTRL_INV_TO_NONE 0
143#define CTRL_INV_TO_1MS 1
144#define CTRL_INV_TO_10MS 2
145#define CTRL_INV_TO_100MS 3
146#define CTRL_INV_TO_1S 4
147#define CTRL_INV_TO_10S 5
148#define CTRL_INV_TO_100S 6
125 149
126/* command specific defines */ 150/* command specific defines */
127#define CMD_COMPL_WAIT 0x01 151#define CMD_COMPL_WAIT 0x01
128#define CMD_INV_DEV_ENTRY 0x02 152#define CMD_INV_DEV_ENTRY 0x02
129#define CMD_INV_IOMMU_PAGES 0x03 153#define CMD_INV_IOMMU_PAGES 0x03
130#define CMD_INV_IOTLB_PAGES 0x04 154#define CMD_INV_IOTLB_PAGES 0x04
155#define CMD_COMPLETE_PPR 0x07
131#define CMD_INV_ALL 0x08 156#define CMD_INV_ALL 0x08
132 157
133#define CMD_COMPL_WAIT_STORE_MASK 0x01 158#define CMD_COMPL_WAIT_STORE_MASK 0x01
134#define CMD_COMPL_WAIT_INT_MASK 0x02 159#define CMD_COMPL_WAIT_INT_MASK 0x02
135#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 160#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
136#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 161#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
162#define CMD_INV_IOMMU_PAGES_GN_MASK 0x04
163
164#define PPR_STATUS_MASK 0xf
165#define PPR_STATUS_SHIFT 12
137 166
138#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL 167#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
139 168
@@ -165,6 +194,23 @@
165#define EVT_BUFFER_SIZE 8192 /* 512 entries */ 194#define EVT_BUFFER_SIZE 8192 /* 512 entries */
166#define EVT_LEN_MASK (0x9ULL << 56) 195#define EVT_LEN_MASK (0x9ULL << 56)
167 196
197/* Constants for PPR Log handling */
198#define PPR_LOG_ENTRIES 512
199#define PPR_LOG_SIZE_SHIFT 56
200#define PPR_LOG_SIZE_512 (0x9ULL << PPR_LOG_SIZE_SHIFT)
201#define PPR_ENTRY_SIZE 16
202#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES)
203
204#define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL)
205#define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL)
206#define PPR_DEVID(x) ((x) & 0xffffULL)
207#define PPR_TAG(x) (((x) >> 32) & 0x3ffULL)
208#define PPR_PASID1(x) (((x) >> 16) & 0xffffULL)
209#define PPR_PASID2(x) (((x) >> 42) & 0xfULL)
210#define PPR_PASID(x) ((PPR_PASID2(x) << 16) | PPR_PASID1(x))
211
212#define PPR_REQ_FAULT 0x01
213
168#define PAGE_MODE_NONE 0x00 214#define PAGE_MODE_NONE 0x00
169#define PAGE_MODE_1_LEVEL 0x01 215#define PAGE_MODE_1_LEVEL 0x01
170#define PAGE_MODE_2_LEVEL 0x02 216#define PAGE_MODE_2_LEVEL 0x02
@@ -230,7 +276,24 @@
230#define IOMMU_PTE_IR (1ULL << 61) 276#define IOMMU_PTE_IR (1ULL << 61)
231#define IOMMU_PTE_IW (1ULL << 62) 277#define IOMMU_PTE_IW (1ULL << 62)
232 278
233#define DTE_FLAG_IOTLB 0x01 279#define DTE_FLAG_IOTLB (0x01UL << 32)
280#define DTE_FLAG_GV (0x01ULL << 55)
281#define DTE_GLX_SHIFT (56)
282#define DTE_GLX_MASK (3)
283
284#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL)
285#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL)
286#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0xfffffULL)
287
288#define DTE_GCR3_INDEX_A 0
289#define DTE_GCR3_INDEX_B 1
290#define DTE_GCR3_INDEX_C 1
291
292#define DTE_GCR3_SHIFT_A 58
293#define DTE_GCR3_SHIFT_B 16
294#define DTE_GCR3_SHIFT_C 43
295
296#define GCR3_VALID 0x01ULL
234 297
235#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) 298#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
236#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) 299#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
@@ -257,6 +320,7 @@
257 domain for an IOMMU */ 320 domain for an IOMMU */
258#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page 321#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
259 translation */ 322 translation */
323#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
260 324
261extern bool amd_iommu_dump; 325extern bool amd_iommu_dump;
262#define DUMP_printk(format, arg...) \ 326#define DUMP_printk(format, arg...) \
@@ -285,6 +349,29 @@ extern bool amd_iommu_iotlb_sup;
285#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) 349#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
286#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) 350#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
287 351
352
353/*
354 * This struct is used to pass information about
355 * incoming PPR faults around.
356 */
357struct amd_iommu_fault {
358 u64 address; /* IO virtual address of the fault*/
359 u32 pasid; /* Address space identifier */
360 u16 device_id; /* Originating PCI device id */
361 u16 tag; /* PPR tag */
362 u16 flags; /* Fault flags */
363
364};
365
366#define PPR_FAULT_EXEC (1 << 1)
367#define PPR_FAULT_READ (1 << 2)
368#define PPR_FAULT_WRITE (1 << 5)
369#define PPR_FAULT_USER (1 << 6)
370#define PPR_FAULT_RSVD (1 << 7)
371#define PPR_FAULT_GN (1 << 8)
372
373struct iommu_domain;
374
288/* 375/*
289 * This structure contains generic data for IOMMU protection domains 376 * This structure contains generic data for IOMMU protection domains
290 * independent of their use. 377 * independent of their use.
@@ -297,11 +384,15 @@ struct protection_domain {
297 u16 id; /* the domain id written to the device table */ 384 u16 id; /* the domain id written to the device table */
298 int mode; /* paging mode (0-6 levels) */ 385 int mode; /* paging mode (0-6 levels) */
299 u64 *pt_root; /* page table root pointer */ 386 u64 *pt_root; /* page table root pointer */
387 int glx; /* Number of levels for GCR3 table */
388 u64 *gcr3_tbl; /* Guest CR3 table */
300 unsigned long flags; /* flags to find out type of domain */ 389 unsigned long flags; /* flags to find out type of domain */
301 bool updated; /* complete domain flush required */ 390 bool updated; /* complete domain flush required */
302 unsigned dev_cnt; /* devices assigned to this domain */ 391 unsigned dev_cnt; /* devices assigned to this domain */
303 unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ 392 unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
304 void *priv; /* private data */ 393 void *priv; /* private data */
394 struct iommu_domain *iommu_domain; /* Pointer to generic
395 domain structure */
305 396
306}; 397};
307 398
@@ -315,10 +406,15 @@ struct iommu_dev_data {
315 struct protection_domain *domain; /* Domain the device is bound to */ 406 struct protection_domain *domain; /* Domain the device is bound to */
316 atomic_t bind; /* Domain attach reverent count */ 407 atomic_t bind; /* Domain attach reverent count */
317 u16 devid; /* PCI Device ID */ 408 u16 devid; /* PCI Device ID */
409 bool iommu_v2; /* Device can make use of IOMMUv2 */
410 bool passthrough; /* Default for device is pt_domain */
318 struct { 411 struct {
319 bool enabled; 412 bool enabled;
320 int qdep; 413 int qdep;
321 } ats; /* ATS state */ 414 } ats; /* ATS state */
415 bool pri_tlp; /* PASID TLB required for
416 PPR completions */
417 u32 errata; /* Bitmap for errata to apply */
322}; 418};
323 419
324/* 420/*
@@ -399,6 +495,9 @@ struct amd_iommu {
399 /* Extended features */ 495 /* Extended features */
400 u64 features; 496 u64 features;
401 497
498 /* IOMMUv2 */
499 bool is_iommu_v2;
500
402 /* 501 /*
403 * Capability pointer. There could be more than one IOMMU per PCI 502 * Capability pointer. There could be more than one IOMMU per PCI
404 * device function if there are more than one AMD IOMMU capability 503 * device function if there are more than one AMD IOMMU capability
@@ -431,6 +530,9 @@ struct amd_iommu {
431 /* MSI number for event interrupt */ 530 /* MSI number for event interrupt */
432 u16 evt_msi_num; 531 u16 evt_msi_num;
433 532
533 /* Base of the PPR log, if present */
534 u8 *ppr_log;
535
434 /* true if interrupts for this IOMMU are already enabled */ 536 /* true if interrupts for this IOMMU are already enabled */
435 bool int_enabled; 537 bool int_enabled;
436 538
@@ -484,7 +586,7 @@ extern struct list_head amd_iommu_pd_list;
484 * Structure defining one entry in the device table 586 * Structure defining one entry in the device table
485 */ 587 */
486struct dev_table_entry { 588struct dev_table_entry {
487 u32 data[8]; 589 u64 data[4];
488}; 590};
489 591
490/* 592/*
@@ -549,6 +651,16 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
549 */ 651 */
550extern bool amd_iommu_unmap_flush; 652extern bool amd_iommu_unmap_flush;
551 653
654/* Smallest number of PASIDs supported by any IOMMU in the system */
655extern u32 amd_iommu_max_pasids;
656
657extern bool amd_iommu_v2_present;
658
659extern bool amd_iommu_force_isolation;
660
661/* Max levels of glxval supported */
662extern int amd_iommu_max_glx_val;
663
552/* takes bus and device/function and returns the device id 664/* takes bus and device/function and returns the device id
553 * FIXME: should that be in generic PCI code? */ 665 * FIXME: should that be in generic PCI code? */
554static inline u16 calc_devid(u8 bus, u8 devfn) 666static inline u16 calc_devid(u8 bus, u8 devfn)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
new file mode 100644
index 00000000000..8add9f125d3
--- /dev/null
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -0,0 +1,994 @@
1/*
2 * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/mmu_notifier.h>
20#include <linux/amd-iommu.h>
21#include <linux/mm_types.h>
22#include <linux/profile.h>
23#include <linux/module.h>
24#include <linux/sched.h>
25#include <linux/iommu.h>
26#include <linux/wait.h>
27#include <linux/pci.h>
28#include <linux/gfp.h>
29
30#include "amd_iommu_types.h"
31#include "amd_iommu_proto.h"
32
33MODULE_LICENSE("GPL v2");
34MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
35
36#define MAX_DEVICES 0x10000
37#define PRI_QUEUE_SIZE 512
38
39struct pri_queue {
40 atomic_t inflight;
41 bool finish;
42 int status;
43};
44
45struct pasid_state {
46 struct list_head list; /* For global state-list */
47 atomic_t count; /* Reference count */
48 struct task_struct *task; /* Task bound to this PASID */
49 struct mm_struct *mm; /* mm_struct for the faults */
50 struct mmu_notifier mn; /* mmu_otifier handle */
51 struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
52 struct device_state *device_state; /* Link to our device_state */
53 int pasid; /* PASID index */
54 spinlock_t lock; /* Protect pri_queues */
55 wait_queue_head_t wq; /* To wait for count == 0 */
56};
57
58struct device_state {
59 atomic_t count;
60 struct pci_dev *pdev;
61 struct pasid_state **states;
62 struct iommu_domain *domain;
63 int pasid_levels;
64 int max_pasids;
65 amd_iommu_invalid_ppr_cb inv_ppr_cb;
66 amd_iommu_invalidate_ctx inv_ctx_cb;
67 spinlock_t lock;
68 wait_queue_head_t wq;
69};
70
71struct fault {
72 struct work_struct work;
73 struct device_state *dev_state;
74 struct pasid_state *state;
75 struct mm_struct *mm;
76 u64 address;
77 u16 devid;
78 u16 pasid;
79 u16 tag;
80 u16 finish;
81 u16 flags;
82};
83
84struct device_state **state_table;
85static spinlock_t state_lock;
86
87/* List and lock for all pasid_states */
88static LIST_HEAD(pasid_state_list);
89static DEFINE_SPINLOCK(ps_lock);
90
91static struct workqueue_struct *iommu_wq;
92
93/*
94 * Empty page table - Used between
95 * mmu_notifier_invalidate_range_start and
96 * mmu_notifier_invalidate_range_end
97 */
98static u64 *empty_page_table;
99
100static void free_pasid_states(struct device_state *dev_state);
101static void unbind_pasid(struct device_state *dev_state, int pasid);
102static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
103
104static u16 device_id(struct pci_dev *pdev)
105{
106 u16 devid;
107
108 devid = pdev->bus->number;
109 devid = (devid << 8) | pdev->devfn;
110
111 return devid;
112}
113
114static struct device_state *get_device_state(u16 devid)
115{
116 struct device_state *dev_state;
117 unsigned long flags;
118
119 spin_lock_irqsave(&state_lock, flags);
120 dev_state = state_table[devid];
121 if (dev_state != NULL)
122 atomic_inc(&dev_state->count);
123 spin_unlock_irqrestore(&state_lock, flags);
124
125 return dev_state;
126}
127
128static void free_device_state(struct device_state *dev_state)
129{
130 /*
131 * First detach device from domain - No more PRI requests will arrive
132 * from that device after it is unbound from the IOMMUv2 domain.
133 */
134 iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
135
136 /* Everything is down now, free the IOMMUv2 domain */
137 iommu_domain_free(dev_state->domain);
138
139 /* Finally get rid of the device-state */
140 kfree(dev_state);
141}
142
143static void put_device_state(struct device_state *dev_state)
144{
145 if (atomic_dec_and_test(&dev_state->count))
146 wake_up(&dev_state->wq);
147}
148
149static void put_device_state_wait(struct device_state *dev_state)
150{
151 DEFINE_WAIT(wait);
152
153 prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
154 if (!atomic_dec_and_test(&dev_state->count))
155 schedule();
156 finish_wait(&dev_state->wq, &wait);
157
158 free_device_state(dev_state);
159}
160
161static struct notifier_block profile_nb = {
162 .notifier_call = task_exit,
163};
164
165static void link_pasid_state(struct pasid_state *pasid_state)
166{
167 spin_lock(&ps_lock);
168 list_add_tail(&pasid_state->list, &pasid_state_list);
169 spin_unlock(&ps_lock);
170}
171
172static void __unlink_pasid_state(struct pasid_state *pasid_state)
173{
174 list_del(&pasid_state->list);
175}
176
177static void unlink_pasid_state(struct pasid_state *pasid_state)
178{
179 spin_lock(&ps_lock);
180 __unlink_pasid_state(pasid_state);
181 spin_unlock(&ps_lock);
182}
183
184/* Must be called under dev_state->lock */
185static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
186 int pasid, bool alloc)
187{
188 struct pasid_state **root, **ptr;
189 int level, index;
190
191 level = dev_state->pasid_levels;
192 root = dev_state->states;
193
194 while (true) {
195
196 index = (pasid >> (9 * level)) & 0x1ff;
197 ptr = &root[index];
198
199 if (level == 0)
200 break;
201
202 if (*ptr == NULL) {
203 if (!alloc)
204 return NULL;
205
206 *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
207 if (*ptr == NULL)
208 return NULL;
209 }
210
211 root = (struct pasid_state **)*ptr;
212 level -= 1;
213 }
214
215 return ptr;
216}
217
218static int set_pasid_state(struct device_state *dev_state,
219 struct pasid_state *pasid_state,
220 int pasid)
221{
222 struct pasid_state **ptr;
223 unsigned long flags;
224 int ret;
225
226 spin_lock_irqsave(&dev_state->lock, flags);
227 ptr = __get_pasid_state_ptr(dev_state, pasid, true);
228
229 ret = -ENOMEM;
230 if (ptr == NULL)
231 goto out_unlock;
232
233 ret = -ENOMEM;
234 if (*ptr != NULL)
235 goto out_unlock;
236
237 *ptr = pasid_state;
238
239 ret = 0;
240
241out_unlock:
242 spin_unlock_irqrestore(&dev_state->lock, flags);
243
244 return ret;
245}
246
247static void clear_pasid_state(struct device_state *dev_state, int pasid)
248{
249 struct pasid_state **ptr;
250 unsigned long flags;
251
252 spin_lock_irqsave(&dev_state->lock, flags);
253 ptr = __get_pasid_state_ptr(dev_state, pasid, true);
254
255 if (ptr == NULL)
256 goto out_unlock;
257
258 *ptr = NULL;
259
260out_unlock:
261 spin_unlock_irqrestore(&dev_state->lock, flags);
262}
263
264static struct pasid_state *get_pasid_state(struct device_state *dev_state,
265 int pasid)
266{
267 struct pasid_state **ptr, *ret = NULL;
268 unsigned long flags;
269
270 spin_lock_irqsave(&dev_state->lock, flags);
271 ptr = __get_pasid_state_ptr(dev_state, pasid, false);
272
273 if (ptr == NULL)
274 goto out_unlock;
275
276 ret = *ptr;
277 if (ret)
278 atomic_inc(&ret->count);
279
280out_unlock:
281 spin_unlock_irqrestore(&dev_state->lock, flags);
282
283 return ret;
284}
285
286static void free_pasid_state(struct pasid_state *pasid_state)
287{
288 kfree(pasid_state);
289}
290
291static void put_pasid_state(struct pasid_state *pasid_state)
292{
293 if (atomic_dec_and_test(&pasid_state->count)) {
294 put_device_state(pasid_state->device_state);
295 wake_up(&pasid_state->wq);
296 }
297}
298
299static void put_pasid_state_wait(struct pasid_state *pasid_state)
300{
301 DEFINE_WAIT(wait);
302
303 prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
304
305 if (atomic_dec_and_test(&pasid_state->count))
306 put_device_state(pasid_state->device_state);
307 else
308 schedule();
309
310 finish_wait(&pasid_state->wq, &wait);
311 mmput(pasid_state->mm);
312 free_pasid_state(pasid_state);
313}
314
315static void __unbind_pasid(struct pasid_state *pasid_state)
316{
317 struct iommu_domain *domain;
318
319 domain = pasid_state->device_state->domain;
320
321 amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
322 clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
323
324 /* Make sure no more pending faults are in the queue */
325 flush_workqueue(iommu_wq);
326
327 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
328
329 put_pasid_state(pasid_state); /* Reference taken in bind() function */
330}
331
332static void unbind_pasid(struct device_state *dev_state, int pasid)
333{
334 struct pasid_state *pasid_state;
335
336 pasid_state = get_pasid_state(dev_state, pasid);
337 if (pasid_state == NULL)
338 return;
339
340 unlink_pasid_state(pasid_state);
341 __unbind_pasid(pasid_state);
342 put_pasid_state_wait(pasid_state); /* Reference taken in this function */
343}
344
345static void free_pasid_states_level1(struct pasid_state **tbl)
346{
347 int i;
348
349 for (i = 0; i < 512; ++i) {
350 if (tbl[i] == NULL)
351 continue;
352
353 free_page((unsigned long)tbl[i]);
354 }
355}
356
357static void free_pasid_states_level2(struct pasid_state **tbl)
358{
359 struct pasid_state **ptr;
360 int i;
361
362 for (i = 0; i < 512; ++i) {
363 if (tbl[i] == NULL)
364 continue;
365
366 ptr = (struct pasid_state **)tbl[i];
367 free_pasid_states_level1(ptr);
368 }
369}
370
371static void free_pasid_states(struct device_state *dev_state)
372{
373 struct pasid_state *pasid_state;
374 int i;
375
376 for (i = 0; i < dev_state->max_pasids; ++i) {
377 pasid_state = get_pasid_state(dev_state, i);
378 if (pasid_state == NULL)
379 continue;
380
381 put_pasid_state(pasid_state);
382 unbind_pasid(dev_state, i);
383 }
384
385 if (dev_state->pasid_levels == 2)
386 free_pasid_states_level2(dev_state->states);
387 else if (dev_state->pasid_levels == 1)
388 free_pasid_states_level1(dev_state->states);
389 else if (dev_state->pasid_levels != 0)
390 BUG();
391
392 free_page((unsigned long)dev_state->states);
393}
394
395static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
396{
397 return container_of(mn, struct pasid_state, mn);
398}
399
400static void __mn_flush_page(struct mmu_notifier *mn,
401 unsigned long address)
402{
403 struct pasid_state *pasid_state;
404 struct device_state *dev_state;
405
406 pasid_state = mn_to_state(mn);
407 dev_state = pasid_state->device_state;
408
409 amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
410}
411
412static int mn_clear_flush_young(struct mmu_notifier *mn,
413 struct mm_struct *mm,
414 unsigned long address)
415{
416 __mn_flush_page(mn, address);
417
418 return 0;
419}
420
421static void mn_change_pte(struct mmu_notifier *mn,
422 struct mm_struct *mm,
423 unsigned long address,
424 pte_t pte)
425{
426 __mn_flush_page(mn, address);
427}
428
429static void mn_invalidate_page(struct mmu_notifier *mn,
430 struct mm_struct *mm,
431 unsigned long address)
432{
433 __mn_flush_page(mn, address);
434}
435
436static void mn_invalidate_range_start(struct mmu_notifier *mn,
437 struct mm_struct *mm,
438 unsigned long start, unsigned long end)
439{
440 struct pasid_state *pasid_state;
441 struct device_state *dev_state;
442
443 pasid_state = mn_to_state(mn);
444 dev_state = pasid_state->device_state;
445
446 amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
447 __pa(empty_page_table));
448}
449
450static void mn_invalidate_range_end(struct mmu_notifier *mn,
451 struct mm_struct *mm,
452 unsigned long start, unsigned long end)
453{
454 struct pasid_state *pasid_state;
455 struct device_state *dev_state;
456
457 pasid_state = mn_to_state(mn);
458 dev_state = pasid_state->device_state;
459
460 amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
461 __pa(pasid_state->mm->pgd));
462}
463
464static struct mmu_notifier_ops iommu_mn = {
465 .clear_flush_young = mn_clear_flush_young,
466 .change_pte = mn_change_pte,
467 .invalidate_page = mn_invalidate_page,
468 .invalidate_range_start = mn_invalidate_range_start,
469 .invalidate_range_end = mn_invalidate_range_end,
470};
471
472static void set_pri_tag_status(struct pasid_state *pasid_state,
473 u16 tag, int status)
474{
475 unsigned long flags;
476
477 spin_lock_irqsave(&pasid_state->lock, flags);
478 pasid_state->pri[tag].status = status;
479 spin_unlock_irqrestore(&pasid_state->lock, flags);
480}
481
482static void finish_pri_tag(struct device_state *dev_state,
483 struct pasid_state *pasid_state,
484 u16 tag)
485{
486 unsigned long flags;
487
488 spin_lock_irqsave(&pasid_state->lock, flags);
489 if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
490 pasid_state->pri[tag].finish) {
491 amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
492 pasid_state->pri[tag].status, tag);
493 pasid_state->pri[tag].finish = false;
494 pasid_state->pri[tag].status = PPR_SUCCESS;
495 }
496 spin_unlock_irqrestore(&pasid_state->lock, flags);
497}
498
499static void do_fault(struct work_struct *work)
500{
501 struct fault *fault = container_of(work, struct fault, work);
502 int npages, write;
503 struct page *page;
504
505 write = !!(fault->flags & PPR_FAULT_WRITE);
506
507 npages = get_user_pages(fault->state->task, fault->state->mm,
508 fault->address, 1, write, 0, &page, NULL);
509
510 if (npages == 1) {
511 put_page(page);
512 } else if (fault->dev_state->inv_ppr_cb) {
513 int status;
514
515 status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
516 fault->pasid,
517 fault->address,
518 fault->flags);
519 switch (status) {
520 case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
521 set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
522 break;
523 case AMD_IOMMU_INV_PRI_RSP_INVALID:
524 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
525 break;
526 case AMD_IOMMU_INV_PRI_RSP_FAIL:
527 set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
528 break;
529 default:
530 BUG();
531 }
532 } else {
533 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
534 }
535
536 finish_pri_tag(fault->dev_state, fault->state, fault->tag);
537
538 put_pasid_state(fault->state);
539
540 kfree(fault);
541}
542
543static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
544{
545 struct amd_iommu_fault *iommu_fault;
546 struct pasid_state *pasid_state;
547 struct device_state *dev_state;
548 unsigned long flags;
549 struct fault *fault;
550 bool finish;
551 u16 tag;
552 int ret;
553
554 iommu_fault = data;
555 tag = iommu_fault->tag & 0x1ff;
556 finish = (iommu_fault->tag >> 9) & 1;
557
558 ret = NOTIFY_DONE;
559 dev_state = get_device_state(iommu_fault->device_id);
560 if (dev_state == NULL)
561 goto out;
562
563 pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
564 if (pasid_state == NULL) {
565 /* We know the device but not the PASID -> send INVALID */
566 amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
567 PPR_INVALID, tag);
568 goto out_drop_state;
569 }
570
571 spin_lock_irqsave(&pasid_state->lock, flags);
572 atomic_inc(&pasid_state->pri[tag].inflight);
573 if (finish)
574 pasid_state->pri[tag].finish = true;
575 spin_unlock_irqrestore(&pasid_state->lock, flags);
576
577 fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
578 if (fault == NULL) {
579 /* We are OOM - send success and let the device re-fault */
580 finish_pri_tag(dev_state, pasid_state, tag);
581 goto out_drop_state;
582 }
583
584 fault->dev_state = dev_state;
585 fault->address = iommu_fault->address;
586 fault->state = pasid_state;
587 fault->tag = tag;
588 fault->finish = finish;
589 fault->flags = iommu_fault->flags;
590 INIT_WORK(&fault->work, do_fault);
591
592 queue_work(iommu_wq, &fault->work);
593
594 ret = NOTIFY_OK;
595
596out_drop_state:
597 put_device_state(dev_state);
598
599out:
600 return ret;
601}
602
603static struct notifier_block ppr_nb = {
604 .notifier_call = ppr_notifier,
605};
606
607static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
608{
609 struct pasid_state *pasid_state;
610 struct task_struct *task;
611
612 task = data;
613
614 /*
615 * Using this notifier is a hack - but there is no other choice
616 * at the moment. What I really want is a sleeping notifier that
617 * is called when an MM goes down. But such a notifier doesn't
618 * exist yet. The notifier needs to sleep because it has to make
619 * sure that the device does not use the PASID and the address
620 * space anymore before it is destroyed. This includes waiting
621 * for pending PRI requests to pass the workqueue. The
622 * MMU-Notifiers would be a good fit, but they use RCU and so
623 * they are not allowed to sleep. Lets see how we can solve this
624 * in a more intelligent way in the future.
625 */
626again:
627 spin_lock(&ps_lock);
628 list_for_each_entry(pasid_state, &pasid_state_list, list) {
629 struct device_state *dev_state;
630 int pasid;
631
632 if (pasid_state->task != task)
633 continue;
634
635 /* Drop Lock and unbind */
636 spin_unlock(&ps_lock);
637
638 dev_state = pasid_state->device_state;
639 pasid = pasid_state->pasid;
640
641 if (pasid_state->device_state->inv_ctx_cb)
642 dev_state->inv_ctx_cb(dev_state->pdev, pasid);
643
644 unbind_pasid(dev_state, pasid);
645
646 /* Task may be in the list multiple times */
647 goto again;
648 }
649 spin_unlock(&ps_lock);
650
651 return NOTIFY_OK;
652}
653
654int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
655 struct task_struct *task)
656{
657 struct pasid_state *pasid_state;
658 struct device_state *dev_state;
659 u16 devid;
660 int ret;
661
662 might_sleep();
663
664 if (!amd_iommu_v2_supported())
665 return -ENODEV;
666
667 devid = device_id(pdev);
668 dev_state = get_device_state(devid);
669
670 if (dev_state == NULL)
671 return -EINVAL;
672
673 ret = -EINVAL;
674 if (pasid < 0 || pasid >= dev_state->max_pasids)
675 goto out;
676
677 ret = -ENOMEM;
678 pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
679 if (pasid_state == NULL)
680 goto out;
681
682 atomic_set(&pasid_state->count, 1);
683 init_waitqueue_head(&pasid_state->wq);
684 pasid_state->task = task;
685 pasid_state->mm = get_task_mm(task);
686 pasid_state->device_state = dev_state;
687 pasid_state->pasid = pasid;
688 pasid_state->mn.ops = &iommu_mn;
689
690 if (pasid_state->mm == NULL)
691 goto out_free;
692
693 mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
694
695 ret = set_pasid_state(dev_state, pasid_state, pasid);
696 if (ret)
697 goto out_unregister;
698
699 ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
700 __pa(pasid_state->mm->pgd));
701 if (ret)
702 goto out_clear_state;
703
704 link_pasid_state(pasid_state);
705
706 return 0;
707
708out_clear_state:
709 clear_pasid_state(dev_state, pasid);
710
711out_unregister:
712 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
713
714out_free:
715 free_pasid_state(pasid_state);
716
717out:
718 put_device_state(dev_state);
719
720 return ret;
721}
722EXPORT_SYMBOL(amd_iommu_bind_pasid);
723
724void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
725{
726 struct device_state *dev_state;
727 u16 devid;
728
729 might_sleep();
730
731 if (!amd_iommu_v2_supported())
732 return;
733
734 devid = device_id(pdev);
735 dev_state = get_device_state(devid);
736 if (dev_state == NULL)
737 return;
738
739 if (pasid < 0 || pasid >= dev_state->max_pasids)
740 goto out;
741
742 unbind_pasid(dev_state, pasid);
743
744out:
745 put_device_state(dev_state);
746}
747EXPORT_SYMBOL(amd_iommu_unbind_pasid);
748
749int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
750{
751 struct device_state *dev_state;
752 unsigned long flags;
753 int ret, tmp;
754 u16 devid;
755
756 might_sleep();
757
758 if (!amd_iommu_v2_supported())
759 return -ENODEV;
760
761 if (pasids <= 0 || pasids > (PASID_MASK + 1))
762 return -EINVAL;
763
764 devid = device_id(pdev);
765
766 dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
767 if (dev_state == NULL)
768 return -ENOMEM;
769
770 spin_lock_init(&dev_state->lock);
771 init_waitqueue_head(&dev_state->wq);
772 dev_state->pdev = pdev;
773
774 tmp = pasids;
775 for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
776 dev_state->pasid_levels += 1;
777
778 atomic_set(&dev_state->count, 1);
779 dev_state->max_pasids = pasids;
780
781 ret = -ENOMEM;
782 dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
783 if (dev_state->states == NULL)
784 goto out_free_dev_state;
785
786 dev_state->domain = iommu_domain_alloc(&pci_bus_type);
787 if (dev_state->domain == NULL)
788 goto out_free_states;
789
790 amd_iommu_domain_direct_map(dev_state->domain);
791
792 ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
793 if (ret)
794 goto out_free_domain;
795
796 ret = iommu_attach_device(dev_state->domain, &pdev->dev);
797 if (ret != 0)
798 goto out_free_domain;
799
800 spin_lock_irqsave(&state_lock, flags);
801
802 if (state_table[devid] != NULL) {
803 spin_unlock_irqrestore(&state_lock, flags);
804 ret = -EBUSY;
805 goto out_free_domain;
806 }
807
808 state_table[devid] = dev_state;
809
810 spin_unlock_irqrestore(&state_lock, flags);
811
812 return 0;
813
814out_free_domain:
815 iommu_domain_free(dev_state->domain);
816
817out_free_states:
818 free_page((unsigned long)dev_state->states);
819
820out_free_dev_state:
821 kfree(dev_state);
822
823 return ret;
824}
825EXPORT_SYMBOL(amd_iommu_init_device);
826
827void amd_iommu_free_device(struct pci_dev *pdev)
828{
829 struct device_state *dev_state;
830 unsigned long flags;
831 u16 devid;
832
833 if (!amd_iommu_v2_supported())
834 return;
835
836 devid = device_id(pdev);
837
838 spin_lock_irqsave(&state_lock, flags);
839
840 dev_state = state_table[devid];
841 if (dev_state == NULL) {
842 spin_unlock_irqrestore(&state_lock, flags);
843 return;
844 }
845
846 state_table[devid] = NULL;
847
848 spin_unlock_irqrestore(&state_lock, flags);
849
850 /* Get rid of any remaining pasid states */
851 free_pasid_states(dev_state);
852
853 put_device_state_wait(dev_state);
854}
855EXPORT_SYMBOL(amd_iommu_free_device);
856
857int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
858 amd_iommu_invalid_ppr_cb cb)
859{
860 struct device_state *dev_state;
861 unsigned long flags;
862 u16 devid;
863 int ret;
864
865 if (!amd_iommu_v2_supported())
866 return -ENODEV;
867
868 devid = device_id(pdev);
869
870 spin_lock_irqsave(&state_lock, flags);
871
872 ret = -EINVAL;
873 dev_state = state_table[devid];
874 if (dev_state == NULL)
875 goto out_unlock;
876
877 dev_state->inv_ppr_cb = cb;
878
879 ret = 0;
880
881out_unlock:
882 spin_unlock_irqrestore(&state_lock, flags);
883
884 return ret;
885}
886EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
887
888int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
889 amd_iommu_invalidate_ctx cb)
890{
891 struct device_state *dev_state;
892 unsigned long flags;
893 u16 devid;
894 int ret;
895
896 if (!amd_iommu_v2_supported())
897 return -ENODEV;
898
899 devid = device_id(pdev);
900
901 spin_lock_irqsave(&state_lock, flags);
902
903 ret = -EINVAL;
904 dev_state = state_table[devid];
905 if (dev_state == NULL)
906 goto out_unlock;
907
908 dev_state->inv_ctx_cb = cb;
909
910 ret = 0;
911
912out_unlock:
913 spin_unlock_irqrestore(&state_lock, flags);
914
915 return ret;
916}
917EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
918
919static int __init amd_iommu_v2_init(void)
920{
921 size_t state_table_size;
922 int ret;
923
924 pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
925
926 spin_lock_init(&state_lock);
927
928 state_table_size = MAX_DEVICES * sizeof(struct device_state *);
929 state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
930 get_order(state_table_size));
931 if (state_table == NULL)
932 return -ENOMEM;
933
934 ret = -ENOMEM;
935 iommu_wq = create_workqueue("amd_iommu_v2");
936 if (iommu_wq == NULL)
937 goto out_free;
938
939 ret = -ENOMEM;
940 empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
941 if (empty_page_table == NULL)
942 goto out_destroy_wq;
943
944 amd_iommu_register_ppr_notifier(&ppr_nb);
945 profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
946
947 return 0;
948
949out_destroy_wq:
950 destroy_workqueue(iommu_wq);
951
952out_free:
953 free_pages((unsigned long)state_table, get_order(state_table_size));
954
955 return ret;
956}
957
958static void __exit amd_iommu_v2_exit(void)
959{
960 struct device_state *dev_state;
961 size_t state_table_size;
962 int i;
963
964 profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
965 amd_iommu_unregister_ppr_notifier(&ppr_nb);
966
967 flush_workqueue(iommu_wq);
968
969 /*
970 * The loop below might call flush_workqueue(), so call
971 * destroy_workqueue() after it
972 */
973 for (i = 0; i < MAX_DEVICES; ++i) {
974 dev_state = get_device_state(i);
975
976 if (dev_state == NULL)
977 continue;
978
979 WARN_ON_ONCE(1);
980
981 put_device_state(dev_state);
982 amd_iommu_free_device(dev_state->pdev);
983 }
984
985 destroy_workqueue(iommu_wq);
986
987 state_table_size = MAX_DEVICES * sizeof(struct device_state *);
988 free_pages((unsigned long)state_table, get_order(state_table_size));
989
990 free_page((unsigned long)empty_page_table);
991}
992
993module_init(amd_iommu_v2_init);
994module_exit(amd_iommu_v2_exit);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 31053a951c3..c9c6053198d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -79,6 +79,24 @@
79#define LEVEL_STRIDE (9) 79#define LEVEL_STRIDE (9)
80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) 80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
81 81
82/*
83 * This bitmap is used to advertise the page sizes our hardware support
84 * to the IOMMU core, which will then use this information to split
85 * physically contiguous memory regions it is mapping into page sizes
86 * that we support.
87 *
88 * Traditionally the IOMMU core just handed us the mappings directly,
89 * after making sure the size is an order of a 4KiB page and that the
90 * mapping has natural alignment.
91 *
92 * To retain this behavior, we currently advertise that we support
93 * all page sizes that are an order of 4KiB.
94 *
95 * If at some point we'd like to utilize the IOMMU core's new behavior,
96 * we could change this to advertise the real page sizes we support.
97 */
98#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
99
82static inline int agaw_to_level(int agaw) 100static inline int agaw_to_level(int agaw)
83{ 101{
84 return agaw + 2; 102 return agaw + 2;
@@ -3979,12 +3997,11 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
3979 3997
3980static int intel_iommu_map(struct iommu_domain *domain, 3998static int intel_iommu_map(struct iommu_domain *domain,
3981 unsigned long iova, phys_addr_t hpa, 3999 unsigned long iova, phys_addr_t hpa,
3982 int gfp_order, int iommu_prot) 4000 size_t size, int iommu_prot)
3983{ 4001{
3984 struct dmar_domain *dmar_domain = domain->priv; 4002 struct dmar_domain *dmar_domain = domain->priv;
3985 u64 max_addr; 4003 u64 max_addr;
3986 int prot = 0; 4004 int prot = 0;
3987 size_t size;
3988 int ret; 4005 int ret;
3989 4006
3990 if (iommu_prot & IOMMU_READ) 4007 if (iommu_prot & IOMMU_READ)
@@ -3994,7 +4011,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
3994 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 4011 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3995 prot |= DMA_PTE_SNP; 4012 prot |= DMA_PTE_SNP;
3996 4013
3997 size = PAGE_SIZE << gfp_order;
3998 max_addr = iova + size; 4014 max_addr = iova + size;
3999 if (dmar_domain->max_addr < max_addr) { 4015 if (dmar_domain->max_addr < max_addr) {
4000 u64 end; 4016 u64 end;
@@ -4017,11 +4033,10 @@ static int intel_iommu_map(struct iommu_domain *domain,
4017 return ret; 4033 return ret;
4018} 4034}
4019 4035
4020static int intel_iommu_unmap(struct iommu_domain *domain, 4036static size_t intel_iommu_unmap(struct iommu_domain *domain,
4021 unsigned long iova, int gfp_order) 4037 unsigned long iova, size_t size)
4022{ 4038{
4023 struct dmar_domain *dmar_domain = domain->priv; 4039 struct dmar_domain *dmar_domain = domain->priv;
4024 size_t size = PAGE_SIZE << gfp_order;
4025 int order; 4040 int order;
4026 4041
4027 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, 4042 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
@@ -4030,7 +4045,7 @@ static int intel_iommu_unmap(struct iommu_domain *domain,
4030 if (dmar_domain->max_addr == iova + size) 4045 if (dmar_domain->max_addr == iova + size)
4031 dmar_domain->max_addr = iova; 4046 dmar_domain->max_addr = iova;
4032 4047
4033 return order; 4048 return PAGE_SIZE << order;
4034} 4049}
4035 4050
4036static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 4051static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -4060,6 +4075,54 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4060 return 0; 4075 return 0;
4061} 4076}
4062 4077
4078/*
4079 * Group numbers are arbitrary. Device with the same group number
4080 * indicate the iommu cannot differentiate between them. To avoid
4081 * tracking used groups we just use the seg|bus|devfn of the lowest
4082 * level we're able to differentiate devices
4083 */
4084static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
4085{
4086 struct pci_dev *pdev = to_pci_dev(dev);
4087 struct pci_dev *bridge;
4088 union {
4089 struct {
4090 u8 devfn;
4091 u8 bus;
4092 u16 segment;
4093 } pci;
4094 u32 group;
4095 } id;
4096
4097 if (iommu_no_mapping(dev))
4098 return -ENODEV;
4099
4100 id.pci.segment = pci_domain_nr(pdev->bus);
4101 id.pci.bus = pdev->bus->number;
4102 id.pci.devfn = pdev->devfn;
4103
4104 if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
4105 return -ENODEV;
4106
4107 bridge = pci_find_upstream_pcie_bridge(pdev);
4108 if (bridge) {
4109 if (pci_is_pcie(bridge)) {
4110 id.pci.bus = bridge->subordinate->number;
4111 id.pci.devfn = 0;
4112 } else {
4113 id.pci.bus = bridge->bus->number;
4114 id.pci.devfn = bridge->devfn;
4115 }
4116 }
4117
4118 if (!pdev->is_virtfn && iommu_group_mf)
4119 id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
4120
4121 *groupid = id.group;
4122
4123 return 0;
4124}
4125
4063static struct iommu_ops intel_iommu_ops = { 4126static struct iommu_ops intel_iommu_ops = {
4064 .domain_init = intel_iommu_domain_init, 4127 .domain_init = intel_iommu_domain_init,
4065 .domain_destroy = intel_iommu_domain_destroy, 4128 .domain_destroy = intel_iommu_domain_destroy,
@@ -4069,6 +4132,8 @@ static struct iommu_ops intel_iommu_ops = {
4069 .unmap = intel_iommu_unmap, 4132 .unmap = intel_iommu_unmap,
4070 .iova_to_phys = intel_iommu_iova_to_phys, 4133 .iova_to_phys = intel_iommu_iova_to_phys,
4071 .domain_has_cap = intel_iommu_domain_has_cap, 4134 .domain_has_cap = intel_iommu_domain_has_cap,
4135 .device_group = intel_iommu_device_group,
4136 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4072}; 4137};
4073 4138
4074static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) 4139static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5b5fa5cdaa3..2198b2dbbcd 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -16,6 +16,8 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19#define pr_fmt(fmt) "%s: " fmt, __func__
20
19#include <linux/device.h> 21#include <linux/device.h>
20#include <linux/kernel.h> 22#include <linux/kernel.h>
21#include <linux/bug.h> 23#include <linux/bug.h>
@@ -25,8 +27,59 @@
25#include <linux/errno.h> 27#include <linux/errno.h>
26#include <linux/iommu.h> 28#include <linux/iommu.h>
27 29
30static ssize_t show_iommu_group(struct device *dev,
31 struct device_attribute *attr, char *buf)
32{
33 unsigned int groupid;
34
35 if (iommu_device_group(dev, &groupid))
36 return 0;
37
38 return sprintf(buf, "%u", groupid);
39}
40static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
41
42static int add_iommu_group(struct device *dev, void *data)
43{
44 unsigned int groupid;
45
46 if (iommu_device_group(dev, &groupid) == 0)
47 return device_create_file(dev, &dev_attr_iommu_group);
48
49 return 0;
50}
51
52static int remove_iommu_group(struct device *dev)
53{
54 unsigned int groupid;
55
56 if (iommu_device_group(dev, &groupid) == 0)
57 device_remove_file(dev, &dev_attr_iommu_group);
58
59 return 0;
60}
61
62static int iommu_device_notifier(struct notifier_block *nb,
63 unsigned long action, void *data)
64{
65 struct device *dev = data;
66
67 if (action == BUS_NOTIFY_ADD_DEVICE)
68 return add_iommu_group(dev, NULL);
69 else if (action == BUS_NOTIFY_DEL_DEVICE)
70 return remove_iommu_group(dev);
71
72 return 0;
73}
74
75static struct notifier_block iommu_device_nb = {
76 .notifier_call = iommu_device_notifier,
77};
78
28static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops) 79static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
29{ 80{
81 bus_register_notifier(bus, &iommu_device_nb);
82 bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
30} 83}
31 84
32/** 85/**
@@ -157,32 +210,134 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
157EXPORT_SYMBOL_GPL(iommu_domain_has_cap); 210EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
158 211
159int iommu_map(struct iommu_domain *domain, unsigned long iova, 212int iommu_map(struct iommu_domain *domain, unsigned long iova,
160 phys_addr_t paddr, int gfp_order, int prot) 213 phys_addr_t paddr, size_t size, int prot)
161{ 214{
162 size_t size; 215 unsigned long orig_iova = iova;
216 unsigned int min_pagesz;
217 size_t orig_size = size;
218 int ret = 0;
163 219
164 if (unlikely(domain->ops->map == NULL)) 220 if (unlikely(domain->ops->map == NULL))
165 return -ENODEV; 221 return -ENODEV;
166 222
167 size = PAGE_SIZE << gfp_order; 223 /* find out the minimum page size supported */
224 min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
225
226 /*
227 * both the virtual address and the physical one, as well as
228 * the size of the mapping, must be aligned (at least) to the
229 * size of the smallest page supported by the hardware
230 */
231 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
232 pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
233 "0x%x\n", iova, (unsigned long)paddr,
234 (unsigned long)size, min_pagesz);
235 return -EINVAL;
236 }
237
238 pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
239 (unsigned long)paddr, (unsigned long)size);
240
241 while (size) {
242 unsigned long pgsize, addr_merge = iova | paddr;
243 unsigned int pgsize_idx;
244
245 /* Max page size that still fits into 'size' */
246 pgsize_idx = __fls(size);
247
248 /* need to consider alignment requirements ? */
249 if (likely(addr_merge)) {
250 /* Max page size allowed by both iova and paddr */
251 unsigned int align_pgsize_idx = __ffs(addr_merge);
252
253 pgsize_idx = min(pgsize_idx, align_pgsize_idx);
254 }
255
256 /* build a mask of acceptable page sizes */
257 pgsize = (1UL << (pgsize_idx + 1)) - 1;
168 258
169 BUG_ON(!IS_ALIGNED(iova | paddr, size)); 259 /* throw away page sizes not supported by the hardware */
260 pgsize &= domain->ops->pgsize_bitmap;
170 261
171 return domain->ops->map(domain, iova, paddr, gfp_order, prot); 262 /* make sure we're still sane */
263 BUG_ON(!pgsize);
264
265 /* pick the biggest page */
266 pgsize_idx = __fls(pgsize);
267 pgsize = 1UL << pgsize_idx;
268
269 pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
270 (unsigned long)paddr, pgsize);
271
272 ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
273 if (ret)
274 break;
275
276 iova += pgsize;
277 paddr += pgsize;
278 size -= pgsize;
279 }
280
281 /* unroll mapping in case something went wrong */
282 if (ret)
283 iommu_unmap(domain, orig_iova, orig_size - size);
284
285 return ret;
172} 286}
173EXPORT_SYMBOL_GPL(iommu_map); 287EXPORT_SYMBOL_GPL(iommu_map);
174 288
175int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order) 289size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
176{ 290{
177 size_t size; 291 size_t unmapped_page, unmapped = 0;
292 unsigned int min_pagesz;
178 293
179 if (unlikely(domain->ops->unmap == NULL)) 294 if (unlikely(domain->ops->unmap == NULL))
180 return -ENODEV; 295 return -ENODEV;
181 296
182 size = PAGE_SIZE << gfp_order; 297 /* find out the minimum page size supported */
298 min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
299
300 /*
301 * The virtual address, as well as the size of the mapping, must be
302 * aligned (at least) to the size of the smallest page supported
303 * by the hardware
304 */
305 if (!IS_ALIGNED(iova | size, min_pagesz)) {
306 pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
307 iova, (unsigned long)size, min_pagesz);
308 return -EINVAL;
309 }
310
311 pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
312 (unsigned long)size);
313
314 /*
315 * Keep iterating until we either unmap 'size' bytes (or more)
316 * or we hit an area that isn't mapped.
317 */
318 while (unmapped < size) {
319 size_t left = size - unmapped;
320
321 unmapped_page = domain->ops->unmap(domain, iova, left);
322 if (!unmapped_page)
323 break;
324
325 pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
326 (unsigned long)unmapped_page);
327
328 iova += unmapped_page;
329 unmapped += unmapped_page;
330 }
331
332 return unmapped;
333}
334EXPORT_SYMBOL_GPL(iommu_unmap);
183 335
184 BUG_ON(!IS_ALIGNED(iova, size)); 336int iommu_device_group(struct device *dev, unsigned int *groupid)
337{
338 if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
339 return dev->bus->iommu_ops->device_group(dev, groupid);
185 340
186 return domain->ops->unmap(domain, iova, gfp_order); 341 return -ENODEV;
187} 342}
188EXPORT_SYMBOL_GPL(iommu_unmap); 343EXPORT_SYMBOL_GPL(iommu_device_group);
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 5865dd2e28f..08a90b88e40 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -42,6 +42,9 @@ __asm__ __volatile__ ( \
42#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0) 42#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0)
43#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1) 43#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1)
44 44
45/* bitmap of the page sizes currently supported */
46#define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
47
45static int msm_iommu_tex_class[4]; 48static int msm_iommu_tex_class[4];
46 49
47DEFINE_SPINLOCK(msm_iommu_lock); 50DEFINE_SPINLOCK(msm_iommu_lock);
@@ -352,7 +355,7 @@ fail:
352} 355}
353 356
354static int msm_iommu_map(struct iommu_domain *domain, unsigned long va, 357static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
355 phys_addr_t pa, int order, int prot) 358 phys_addr_t pa, size_t len, int prot)
356{ 359{
357 struct msm_priv *priv; 360 struct msm_priv *priv;
358 unsigned long flags; 361 unsigned long flags;
@@ -363,7 +366,6 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
363 unsigned long *sl_pte; 366 unsigned long *sl_pte;
364 unsigned long sl_offset; 367 unsigned long sl_offset;
365 unsigned int pgprot; 368 unsigned int pgprot;
366 size_t len = 0x1000UL << order;
367 int ret = 0, tex, sh; 369 int ret = 0, tex, sh;
368 370
369 spin_lock_irqsave(&msm_iommu_lock, flags); 371 spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -463,8 +465,8 @@ fail:
463 return ret; 465 return ret;
464} 466}
465 467
466static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, 468static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
467 int order) 469 size_t len)
468{ 470{
469 struct msm_priv *priv; 471 struct msm_priv *priv;
470 unsigned long flags; 472 unsigned long flags;
@@ -474,7 +476,6 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
474 unsigned long *sl_table; 476 unsigned long *sl_table;
475 unsigned long *sl_pte; 477 unsigned long *sl_pte;
476 unsigned long sl_offset; 478 unsigned long sl_offset;
477 size_t len = 0x1000UL << order;
478 int i, ret = 0; 479 int i, ret = 0;
479 480
480 spin_lock_irqsave(&msm_iommu_lock, flags); 481 spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -544,15 +545,12 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
544 545
545 ret = __flush_iotlb(domain); 546 ret = __flush_iotlb(domain);
546 547
547 /*
548 * the IOMMU API requires us to return the order of the unmapped
549 * page (on success).
550 */
551 if (!ret)
552 ret = order;
553fail: 548fail:
554 spin_unlock_irqrestore(&msm_iommu_lock, flags); 549 spin_unlock_irqrestore(&msm_iommu_lock, flags);
555 return ret; 550
551 /* the IOMMU API requires us to return how many bytes were unmapped */
552 len = ret ? 0 : len;
553 return len;
556} 554}
557 555
558static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, 556static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -684,7 +682,8 @@ static struct iommu_ops msm_iommu_ops = {
684 .map = msm_iommu_map, 682 .map = msm_iommu_map,
685 .unmap = msm_iommu_unmap, 683 .unmap = msm_iommu_unmap,
686 .iova_to_phys = msm_iommu_iova_to_phys, 684 .iova_to_phys = msm_iommu_iova_to_phys,
687 .domain_has_cap = msm_iommu_domain_has_cap 685 .domain_has_cap = msm_iommu_domain_has_cap,
686 .pgsize_bitmap = MSM_IOMMU_PGSIZES,
688}; 687};
689 688
690static int __init get_tex_class(int icp, int ocp, int mt, int nos) 689static int __init get_tex_class(int icp, int ocp, int mt, int nos)
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 8f32b2bf758..d8edd979d01 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -33,6 +33,9 @@
33 (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true); \ 33 (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true); \
34 __i++) 34 __i++)
35 35
36/* bitmap of the page sizes currently supported */
37#define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
38
36/** 39/**
37 * struct omap_iommu_domain - omap iommu domain 40 * struct omap_iommu_domain - omap iommu domain
38 * @pgtable: the page table 41 * @pgtable: the page table
@@ -86,20 +89,24 @@ EXPORT_SYMBOL_GPL(omap_uninstall_iommu_arch);
86 89
87/** 90/**
88 * omap_iommu_save_ctx - Save registers for pm off-mode support 91 * omap_iommu_save_ctx - Save registers for pm off-mode support
89 * @obj: target iommu 92 * @dev: client device
90 **/ 93 **/
91void omap_iommu_save_ctx(struct omap_iommu *obj) 94void omap_iommu_save_ctx(struct device *dev)
92{ 95{
96 struct omap_iommu *obj = dev_to_omap_iommu(dev);
97
93 arch_iommu->save_ctx(obj); 98 arch_iommu->save_ctx(obj);
94} 99}
95EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); 100EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
96 101
97/** 102/**
98 * omap_iommu_restore_ctx - Restore registers for pm off-mode support 103 * omap_iommu_restore_ctx - Restore registers for pm off-mode support
99 * @obj: target iommu 104 * @dev: client device
100 **/ 105 **/
101void omap_iommu_restore_ctx(struct omap_iommu *obj) 106void omap_iommu_restore_ctx(struct device *dev)
102{ 107{
108 struct omap_iommu *obj = dev_to_omap_iommu(dev);
109
103 arch_iommu->restore_ctx(obj); 110 arch_iommu->restore_ctx(obj);
104} 111}
105EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); 112EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
@@ -820,35 +827,23 @@ static int device_match_by_alias(struct device *dev, void *data)
820} 827}
821 828
822/** 829/**
823 * omap_find_iommu_device() - find an omap iommu device by name
824 * @name: name of the iommu device
825 *
826 * The generic iommu API requires the caller to provide the device
827 * he wishes to attach to a certain iommu domain.
828 *
829 * Drivers generally should not bother with this as it should just
830 * be taken care of by the DMA-API using dev_archdata.
831 *
832 * This function is provided as an interim solution until the latter
833 * materializes, and omap3isp is fully migrated to the DMA-API.
834 */
835struct device *omap_find_iommu_device(const char *name)
836{
837 return driver_find_device(&omap_iommu_driver.driver, NULL,
838 (void *)name,
839 device_match_by_alias);
840}
841EXPORT_SYMBOL_GPL(omap_find_iommu_device);
842
843/**
844 * omap_iommu_attach() - attach iommu device to an iommu domain 830 * omap_iommu_attach() - attach iommu device to an iommu domain
845 * @dev: target omap iommu device 831 * @name: name of target omap iommu device
846 * @iopgd: page table 832 * @iopgd: page table
847 **/ 833 **/
848static struct omap_iommu *omap_iommu_attach(struct device *dev, u32 *iopgd) 834static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
849{ 835{
850 int err = -ENOMEM; 836 int err = -ENOMEM;
851 struct omap_iommu *obj = to_iommu(dev); 837 struct device *dev;
838 struct omap_iommu *obj;
839
840 dev = driver_find_device(&omap_iommu_driver.driver, NULL,
841 (void *)name,
842 device_match_by_alias);
843 if (!dev)
844 return NULL;
845
846 obj = to_iommu(dev);
852 847
853 spin_lock(&obj->iommu_lock); 848 spin_lock(&obj->iommu_lock);
854 849
@@ -1019,12 +1014,11 @@ static void iopte_cachep_ctor(void *iopte)
1019} 1014}
1020 1015
1021static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, 1016static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
1022 phys_addr_t pa, int order, int prot) 1017 phys_addr_t pa, size_t bytes, int prot)
1023{ 1018{
1024 struct omap_iommu_domain *omap_domain = domain->priv; 1019 struct omap_iommu_domain *omap_domain = domain->priv;
1025 struct omap_iommu *oiommu = omap_domain->iommu_dev; 1020 struct omap_iommu *oiommu = omap_domain->iommu_dev;
1026 struct device *dev = oiommu->dev; 1021 struct device *dev = oiommu->dev;
1027 size_t bytes = PAGE_SIZE << order;
1028 struct iotlb_entry e; 1022 struct iotlb_entry e;
1029 int omap_pgsz; 1023 int omap_pgsz;
1030 u32 ret, flags; 1024 u32 ret, flags;
@@ -1049,19 +1043,16 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
1049 return ret; 1043 return ret;
1050} 1044}
1051 1045
1052static int omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, 1046static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
1053 int order) 1047 size_t size)
1054{ 1048{
1055 struct omap_iommu_domain *omap_domain = domain->priv; 1049 struct omap_iommu_domain *omap_domain = domain->priv;
1056 struct omap_iommu *oiommu = omap_domain->iommu_dev; 1050 struct omap_iommu *oiommu = omap_domain->iommu_dev;
1057 struct device *dev = oiommu->dev; 1051 struct device *dev = oiommu->dev;
1058 size_t unmap_size;
1059
1060 dev_dbg(dev, "unmapping da 0x%lx order %d\n", da, order);
1061 1052
1062 unmap_size = iopgtable_clear_entry(oiommu, da); 1053 dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
1063 1054
1064 return unmap_size ? get_order(unmap_size) : -EINVAL; 1055 return iopgtable_clear_entry(oiommu, da);
1065} 1056}
1066 1057
1067static int 1058static int
@@ -1069,6 +1060,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
1069{ 1060{
1070 struct omap_iommu_domain *omap_domain = domain->priv; 1061 struct omap_iommu_domain *omap_domain = domain->priv;
1071 struct omap_iommu *oiommu; 1062 struct omap_iommu *oiommu;
1063 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
1072 int ret = 0; 1064 int ret = 0;
1073 1065
1074 spin_lock(&omap_domain->lock); 1066 spin_lock(&omap_domain->lock);
@@ -1081,14 +1073,14 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
1081 } 1073 }
1082 1074
1083 /* get a handle to and enable the omap iommu */ 1075 /* get a handle to and enable the omap iommu */
1084 oiommu = omap_iommu_attach(dev, omap_domain->pgtable); 1076 oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable);
1085 if (IS_ERR(oiommu)) { 1077 if (IS_ERR(oiommu)) {
1086 ret = PTR_ERR(oiommu); 1078 ret = PTR_ERR(oiommu);
1087 dev_err(dev, "can't get omap iommu: %d\n", ret); 1079 dev_err(dev, "can't get omap iommu: %d\n", ret);
1088 goto out; 1080 goto out;
1089 } 1081 }
1090 1082
1091 omap_domain->iommu_dev = oiommu; 1083 omap_domain->iommu_dev = arch_data->iommu_dev = oiommu;
1092 oiommu->domain = domain; 1084 oiommu->domain = domain;
1093 1085
1094out: 1086out:
@@ -1100,7 +1092,8 @@ static void omap_iommu_detach_dev(struct iommu_domain *domain,
1100 struct device *dev) 1092 struct device *dev)
1101{ 1093{
1102 struct omap_iommu_domain *omap_domain = domain->priv; 1094 struct omap_iommu_domain *omap_domain = domain->priv;
1103 struct omap_iommu *oiommu = to_iommu(dev); 1095 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
1096 struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
1104 1097
1105 spin_lock(&omap_domain->lock); 1098 spin_lock(&omap_domain->lock);
1106 1099
@@ -1114,7 +1107,7 @@ static void omap_iommu_detach_dev(struct iommu_domain *domain,
1114 1107
1115 omap_iommu_detach(oiommu); 1108 omap_iommu_detach(oiommu);
1116 1109
1117 omap_domain->iommu_dev = NULL; 1110 omap_domain->iommu_dev = arch_data->iommu_dev = NULL;
1118 1111
1119out: 1112out:
1120 spin_unlock(&omap_domain->lock); 1113 spin_unlock(&omap_domain->lock);
@@ -1183,14 +1176,14 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
1183 else if (iopte_is_large(*pte)) 1176 else if (iopte_is_large(*pte))
1184 ret = omap_iommu_translate(*pte, da, IOLARGE_MASK); 1177 ret = omap_iommu_translate(*pte, da, IOLARGE_MASK);
1185 else 1178 else
1186 dev_err(dev, "bogus pte 0x%x", *pte); 1179 dev_err(dev, "bogus pte 0x%x, da 0x%lx", *pte, da);
1187 } else { 1180 } else {
1188 if (iopgd_is_section(*pgd)) 1181 if (iopgd_is_section(*pgd))
1189 ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK); 1182 ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK);
1190 else if (iopgd_is_super(*pgd)) 1183 else if (iopgd_is_super(*pgd))
1191 ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK); 1184 ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK);
1192 else 1185 else
1193 dev_err(dev, "bogus pgd 0x%x", *pgd); 1186 dev_err(dev, "bogus pgd 0x%x, da 0x%lx", *pgd, da);
1194 } 1187 }
1195 1188
1196 return ret; 1189 return ret;
@@ -1211,6 +1204,7 @@ static struct iommu_ops omap_iommu_ops = {
1211 .unmap = omap_iommu_unmap, 1204 .unmap = omap_iommu_unmap,
1212 .iova_to_phys = omap_iommu_iova_to_phys, 1205 .iova_to_phys = omap_iommu_iova_to_phys,
1213 .domain_has_cap = omap_iommu_domain_has_cap, 1206 .domain_has_cap = omap_iommu_domain_has_cap,
1207 .pgsize_bitmap = OMAP_IOMMU_PGSIZES,
1214}; 1208};
1215 1209
1216static int __init omap_iommu_init(void) 1210static int __init omap_iommu_init(void)
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
index 46be456fcc0..2e10c3e0a7a 100644
--- a/drivers/iommu/omap-iovmm.c
+++ b/drivers/iommu/omap-iovmm.c
@@ -231,12 +231,14 @@ static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj,
231 231
232/** 232/**
233 * omap_find_iovm_area - find iovma which includes @da 233 * omap_find_iovm_area - find iovma which includes @da
234 * @dev: client device
234 * @da: iommu device virtual address 235 * @da: iommu device virtual address
235 * 236 *
236 * Find the existing iovma starting at @da 237 * Find the existing iovma starting at @da
237 */ 238 */
238struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da) 239struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da)
239{ 240{
241 struct omap_iommu *obj = dev_to_omap_iommu(dev);
240 struct iovm_struct *area; 242 struct iovm_struct *area;
241 243
242 mutex_lock(&obj->mmap_lock); 244 mutex_lock(&obj->mmap_lock);
@@ -343,14 +345,15 @@ static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area)
343 345
344/** 346/**
345 * omap_da_to_va - convert (d) to (v) 347 * omap_da_to_va - convert (d) to (v)
346 * @obj: objective iommu 348 * @dev: client device
347 * @da: iommu device virtual address 349 * @da: iommu device virtual address
348 * @va: mpu virtual address 350 * @va: mpu virtual address
349 * 351 *
350 * Returns mpu virtual addr which corresponds to a given device virtual addr 352 * Returns mpu virtual addr which corresponds to a given device virtual addr
351 */ 353 */
352void *omap_da_to_va(struct omap_iommu *obj, u32 da) 354void *omap_da_to_va(struct device *dev, u32 da)
353{ 355{
356 struct omap_iommu *obj = dev_to_omap_iommu(dev);
354 void *va = NULL; 357 void *va = NULL;
355 struct iovm_struct *area; 358 struct iovm_struct *area;
356 359
@@ -410,7 +413,6 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
410 unsigned int i, j; 413 unsigned int i, j;
411 struct scatterlist *sg; 414 struct scatterlist *sg;
412 u32 da = new->da_start; 415 u32 da = new->da_start;
413 int order;
414 416
415 if (!domain || !sgt) 417 if (!domain || !sgt)
416 return -EINVAL; 418 return -EINVAL;
@@ -429,12 +431,10 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
429 if (bytes_to_iopgsz(bytes) < 0) 431 if (bytes_to_iopgsz(bytes) < 0)
430 goto err_out; 432 goto err_out;
431 433
432 order = get_order(bytes);
433
434 pr_debug("%s: [%d] %08x %08x(%x)\n", __func__, 434 pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
435 i, da, pa, bytes); 435 i, da, pa, bytes);
436 436
437 err = iommu_map(domain, da, pa, order, flags); 437 err = iommu_map(domain, da, pa, bytes, flags);
438 if (err) 438 if (err)
439 goto err_out; 439 goto err_out;
440 440
@@ -449,10 +449,9 @@ err_out:
449 size_t bytes; 449 size_t bytes;
450 450
451 bytes = sg->length + sg->offset; 451 bytes = sg->length + sg->offset;
452 order = get_order(bytes);
453 452
454 /* ignore failures.. we're already handling one */ 453 /* ignore failures.. we're already handling one */
455 iommu_unmap(domain, da, order); 454 iommu_unmap(domain, da, bytes);
456 455
457 da += bytes; 456 da += bytes;
458 } 457 }
@@ -467,7 +466,8 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
467 size_t total = area->da_end - area->da_start; 466 size_t total = area->da_end - area->da_start;
468 const struct sg_table *sgt = area->sgt; 467 const struct sg_table *sgt = area->sgt;
469 struct scatterlist *sg; 468 struct scatterlist *sg;
470 int i, err; 469 int i;
470 size_t unmapped;
471 471
472 BUG_ON(!sgtable_ok(sgt)); 472 BUG_ON(!sgtable_ok(sgt));
473 BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE)); 473 BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
@@ -475,13 +475,11 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
475 start = area->da_start; 475 start = area->da_start;
476 for_each_sg(sgt->sgl, sg, sgt->nents, i) { 476 for_each_sg(sgt->sgl, sg, sgt->nents, i) {
477 size_t bytes; 477 size_t bytes;
478 int order;
479 478
480 bytes = sg->length + sg->offset; 479 bytes = sg->length + sg->offset;
481 order = get_order(bytes);
482 480
483 err = iommu_unmap(domain, start, order); 481 unmapped = iommu_unmap(domain, start, bytes);
484 if (err < 0) 482 if (unmapped < bytes)
485 break; 483 break;
486 484
487 dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", 485 dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
@@ -582,16 +580,18 @@ __iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj,
582 580
583/** 581/**
584 * omap_iommu_vmap - (d)-(p)-(v) address mapper 582 * omap_iommu_vmap - (d)-(p)-(v) address mapper
585 * @obj: objective iommu 583 * @domain: iommu domain
584 * @dev: client device
586 * @sgt: address of scatter gather table 585 * @sgt: address of scatter gather table
587 * @flags: iovma and page property 586 * @flags: iovma and page property
588 * 587 *
589 * Creates 1-n-1 mapping with given @sgt and returns @da. 588 * Creates 1-n-1 mapping with given @sgt and returns @da.
590 * All @sgt element must be io page size aligned. 589 * All @sgt element must be io page size aligned.
591 */ 590 */
592u32 omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da, 591u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
593 const struct sg_table *sgt, u32 flags) 592 const struct sg_table *sgt, u32 flags)
594{ 593{
594 struct omap_iommu *obj = dev_to_omap_iommu(dev);
595 size_t bytes; 595 size_t bytes;
596 void *va = NULL; 596 void *va = NULL;
597 597
@@ -622,15 +622,17 @@ EXPORT_SYMBOL_GPL(omap_iommu_vmap);
622 622
623/** 623/**
624 * omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()' 624 * omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()'
625 * @obj: objective iommu 625 * @domain: iommu domain
626 * @dev: client device
626 * @da: iommu device virtual address 627 * @da: iommu device virtual address
627 * 628 *
628 * Free the iommu virtually contiguous memory area starting at 629 * Free the iommu virtually contiguous memory area starting at
629 * @da, which was returned by 'omap_iommu_vmap()'. 630 * @da, which was returned by 'omap_iommu_vmap()'.
630 */ 631 */
631struct sg_table * 632struct sg_table *
632omap_iommu_vunmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da) 633omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da)
633{ 634{
635 struct omap_iommu *obj = dev_to_omap_iommu(dev);
634 struct sg_table *sgt; 636 struct sg_table *sgt;
635 /* 637 /*
636 * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called. 638 * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called.
@@ -647,7 +649,7 @@ EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
647 649
648/** 650/**
649 * omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper 651 * omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper
650 * @obj: objective iommu 652 * @dev: client device
651 * @da: contiguous iommu virtual memory 653 * @da: contiguous iommu virtual memory
652 * @bytes: allocation size 654 * @bytes: allocation size
653 * @flags: iovma and page property 655 * @flags: iovma and page property
@@ -656,9 +658,10 @@ EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
656 * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set. 658 * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
657 */ 659 */
658u32 660u32
659omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj, u32 da, 661omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da,
660 size_t bytes, u32 flags) 662 size_t bytes, u32 flags)
661{ 663{
664 struct omap_iommu *obj = dev_to_omap_iommu(dev);
662 void *va; 665 void *va;
663 struct sg_table *sgt; 666 struct sg_table *sgt;
664 667
@@ -698,15 +701,16 @@ EXPORT_SYMBOL_GPL(omap_iommu_vmalloc);
698 701
699/** 702/**
700 * omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()' 703 * omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()'
701 * @obj: objective iommu 704 * @dev: client device
702 * @da: iommu device virtual address 705 * @da: iommu device virtual address
703 * 706 *
704 * Frees the iommu virtually continuous memory area starting at 707 * Frees the iommu virtually continuous memory area starting at
705 * @da, as obtained from 'omap_iommu_vmalloc()'. 708 * @da, as obtained from 'omap_iommu_vmalloc()'.
706 */ 709 */
707void omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj, 710void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
708 const u32 da) 711 const u32 da)
709{ 712{
713 struct omap_iommu *obj = dev_to_omap_iommu(dev);
710 struct sg_table *sgt; 714 struct sg_table *sgt;
711 715
712 sgt = unmap_vm_area(domain, obj, da, vfree, 716 sgt = unmap_vm_area(domain, obj, da, vfree,
diff --git a/drivers/media/video/omap3isp/isp.c b/drivers/media/video/omap3isp/isp.c
index b818cacf420..d4c48ef227f 100644
--- a/drivers/media/video/omap3isp/isp.c
+++ b/drivers/media/video/omap3isp/isp.c
@@ -80,13 +80,6 @@
80#include "isph3a.h" 80#include "isph3a.h"
81#include "isphist.h" 81#include "isphist.h"
82 82
83/*
84 * this is provided as an interim solution until omap3isp doesn't need
85 * any omap-specific iommu API
86 */
87#define to_iommu(dev) \
88 (struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))
89
90static unsigned int autoidle; 83static unsigned int autoidle;
91module_param(autoidle, int, 0444); 84module_param(autoidle, int, 0444);
92MODULE_PARM_DESC(autoidle, "Enable OMAP3ISP AUTOIDLE support"); 85MODULE_PARM_DESC(autoidle, "Enable OMAP3ISP AUTOIDLE support");
@@ -1114,8 +1107,7 @@ isp_restore_context(struct isp_device *isp, struct isp_reg *reg_list)
1114static void isp_save_ctx(struct isp_device *isp) 1107static void isp_save_ctx(struct isp_device *isp)
1115{ 1108{
1116 isp_save_context(isp, isp_reg_list); 1109 isp_save_context(isp, isp_reg_list);
1117 if (isp->iommu) 1110 omap_iommu_save_ctx(isp->dev);
1118 omap_iommu_save_ctx(isp->iommu);
1119} 1111}
1120 1112
1121/* 1113/*
@@ -1128,8 +1120,7 @@ static void isp_save_ctx(struct isp_device *isp)
1128static void isp_restore_ctx(struct isp_device *isp) 1120static void isp_restore_ctx(struct isp_device *isp)
1129{ 1121{
1130 isp_restore_context(isp, isp_reg_list); 1122 isp_restore_context(isp, isp_reg_list);
1131 if (isp->iommu) 1123 omap_iommu_restore_ctx(isp->dev);
1132 omap_iommu_restore_ctx(isp->iommu);
1133 omap3isp_ccdc_restore_context(isp); 1124 omap3isp_ccdc_restore_context(isp);
1134 omap3isp_preview_restore_context(isp); 1125 omap3isp_preview_restore_context(isp);
1135} 1126}
@@ -1983,7 +1974,7 @@ static int isp_remove(struct platform_device *pdev)
1983 isp_cleanup_modules(isp); 1974 isp_cleanup_modules(isp);
1984 1975
1985 omap3isp_get(isp); 1976 omap3isp_get(isp);
1986 iommu_detach_device(isp->domain, isp->iommu_dev); 1977 iommu_detach_device(isp->domain, &pdev->dev);
1987 iommu_domain_free(isp->domain); 1978 iommu_domain_free(isp->domain);
1988 omap3isp_put(isp); 1979 omap3isp_put(isp);
1989 1980
@@ -2131,17 +2122,6 @@ static int isp_probe(struct platform_device *pdev)
2131 } 2122 }
2132 } 2123 }
2133 2124
2134 /* IOMMU */
2135 isp->iommu_dev = omap_find_iommu_device("isp");
2136 if (!isp->iommu_dev) {
2137 dev_err(isp->dev, "omap_find_iommu_device failed\n");
2138 ret = -ENODEV;
2139 goto error_isp;
2140 }
2141
2142 /* to be removed once iommu migration is complete */
2143 isp->iommu = to_iommu(isp->iommu_dev);
2144
2145 isp->domain = iommu_domain_alloc(pdev->dev.bus); 2125 isp->domain = iommu_domain_alloc(pdev->dev.bus);
2146 if (!isp->domain) { 2126 if (!isp->domain) {
2147 dev_err(isp->dev, "can't alloc iommu domain\n"); 2127 dev_err(isp->dev, "can't alloc iommu domain\n");
@@ -2149,7 +2129,7 @@ static int isp_probe(struct platform_device *pdev)
2149 goto error_isp; 2129 goto error_isp;
2150 } 2130 }
2151 2131
2152 ret = iommu_attach_device(isp->domain, isp->iommu_dev); 2132 ret = iommu_attach_device(isp->domain, &pdev->dev);
2153 if (ret) { 2133 if (ret) {
2154 dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret); 2134 dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret);
2155 goto free_domain; 2135 goto free_domain;
@@ -2188,7 +2168,7 @@ error_modules:
2188error_irq: 2168error_irq:
2189 free_irq(isp->irq_num, isp); 2169 free_irq(isp->irq_num, isp);
2190detach_dev: 2170detach_dev:
2191 iommu_detach_device(isp->domain, isp->iommu_dev); 2171 iommu_detach_device(isp->domain, &pdev->dev);
2192free_domain: 2172free_domain:
2193 iommu_domain_free(isp->domain); 2173 iommu_domain_free(isp->domain);
2194error_isp: 2174error_isp:
diff --git a/drivers/media/video/omap3isp/isp.h b/drivers/media/video/omap3isp/isp.h
index 705946ef4d6..d96603eb0d1 100644
--- a/drivers/media/video/omap3isp/isp.h
+++ b/drivers/media/video/omap3isp/isp.h
@@ -212,9 +212,7 @@ struct isp_device {
212 unsigned int sbl_resources; 212 unsigned int sbl_resources;
213 unsigned int subclk_resources; 213 unsigned int subclk_resources;
214 214
215 struct omap_iommu *iommu;
216 struct iommu_domain *domain; 215 struct iommu_domain *domain;
217 struct device *iommu_dev;
218 216
219 struct isp_platform_callback platform_cb; 217 struct isp_platform_callback platform_cb;
220}; 218};
diff --git a/drivers/media/video/omap3isp/ispccdc.c b/drivers/media/video/omap3isp/ispccdc.c
index 54a4a3f22e2..d341ba12593 100644
--- a/drivers/media/video/omap3isp/ispccdc.c
+++ b/drivers/media/video/omap3isp/ispccdc.c
@@ -366,7 +366,7 @@ static void ccdc_lsc_free_request(struct isp_ccdc_device *ccdc,
366 dma_unmap_sg(isp->dev, req->iovm->sgt->sgl, 366 dma_unmap_sg(isp->dev, req->iovm->sgt->sgl,
367 req->iovm->sgt->nents, DMA_TO_DEVICE); 367 req->iovm->sgt->nents, DMA_TO_DEVICE);
368 if (req->table) 368 if (req->table)
369 omap_iommu_vfree(isp->domain, isp->iommu, req->table); 369 omap_iommu_vfree(isp->domain, isp->dev, req->table);
370 kfree(req); 370 kfree(req);
371} 371}
372 372
@@ -438,7 +438,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
438 438
439 req->enable = 1; 439 req->enable = 1;
440 440
441 req->table = omap_iommu_vmalloc(isp->domain, isp->iommu, 0, 441 req->table = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
442 req->config.size, IOMMU_FLAG); 442 req->config.size, IOMMU_FLAG);
443 if (IS_ERR_VALUE(req->table)) { 443 if (IS_ERR_VALUE(req->table)) {
444 req->table = 0; 444 req->table = 0;
@@ -446,7 +446,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
446 goto done; 446 goto done;
447 } 447 }
448 448
449 req->iovm = omap_find_iovm_area(isp->iommu, req->table); 449 req->iovm = omap_find_iovm_area(isp->dev, req->table);
450 if (req->iovm == NULL) { 450 if (req->iovm == NULL) {
451 ret = -ENOMEM; 451 ret = -ENOMEM;
452 goto done; 452 goto done;
@@ -462,7 +462,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
462 dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl, 462 dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl,
463 req->iovm->sgt->nents, DMA_TO_DEVICE); 463 req->iovm->sgt->nents, DMA_TO_DEVICE);
464 464
465 table = omap_da_to_va(isp->iommu, req->table); 465 table = omap_da_to_va(isp->dev, req->table);
466 if (copy_from_user(table, config->lsc, req->config.size)) { 466 if (copy_from_user(table, config->lsc, req->config.size)) {
467 ret = -EFAULT; 467 ret = -EFAULT;
468 goto done; 468 goto done;
@@ -734,15 +734,15 @@ static int ccdc_config(struct isp_ccdc_device *ccdc,
734 * already done by omap_iommu_vmalloc(). 734 * already done by omap_iommu_vmalloc().
735 */ 735 */
736 size = ccdc->fpc.fpnum * 4; 736 size = ccdc->fpc.fpnum * 4;
737 table_new = omap_iommu_vmalloc(isp->domain, isp->iommu, 737 table_new = omap_iommu_vmalloc(isp->domain, isp->dev,
738 0, size, IOMMU_FLAG); 738 0, size, IOMMU_FLAG);
739 if (IS_ERR_VALUE(table_new)) 739 if (IS_ERR_VALUE(table_new))
740 return -ENOMEM; 740 return -ENOMEM;
741 741
742 if (copy_from_user(omap_da_to_va(isp->iommu, table_new), 742 if (copy_from_user(omap_da_to_va(isp->dev, table_new),
743 (__force void __user *) 743 (__force void __user *)
744 ccdc->fpc.fpcaddr, size)) { 744 ccdc->fpc.fpcaddr, size)) {
745 omap_iommu_vfree(isp->domain, isp->iommu, 745 omap_iommu_vfree(isp->domain, isp->dev,
746 table_new); 746 table_new);
747 return -EFAULT; 747 return -EFAULT;
748 } 748 }
@@ -753,7 +753,7 @@ static int ccdc_config(struct isp_ccdc_device *ccdc,
753 753
754 ccdc_configure_fpc(ccdc); 754 ccdc_configure_fpc(ccdc);
755 if (table_old != 0) 755 if (table_old != 0)
756 omap_iommu_vfree(isp->domain, isp->iommu, table_old); 756 omap_iommu_vfree(isp->domain, isp->dev, table_old);
757 } 757 }
758 758
759 return ccdc_lsc_config(ccdc, ccdc_struct); 759 return ccdc_lsc_config(ccdc, ccdc_struct);
@@ -2309,7 +2309,7 @@ void omap3isp_ccdc_cleanup(struct isp_device *isp)
2309 ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue); 2309 ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue);
2310 2310
2311 if (ccdc->fpc.fpcaddr != 0) 2311 if (ccdc->fpc.fpcaddr != 0)
2312 omap_iommu_vfree(isp->domain, isp->iommu, ccdc->fpc.fpcaddr); 2312 omap_iommu_vfree(isp->domain, isp->dev, ccdc->fpc.fpcaddr);
2313 2313
2314 mutex_destroy(&ccdc->ioctl_lock); 2314 mutex_destroy(&ccdc->ioctl_lock);
2315} 2315}
diff --git a/drivers/media/video/omap3isp/ispstat.c b/drivers/media/video/omap3isp/ispstat.c
index bc0b2c7349b..11871ecc6d2 100644
--- a/drivers/media/video/omap3isp/ispstat.c
+++ b/drivers/media/video/omap3isp/ispstat.c
@@ -366,7 +366,7 @@ static void isp_stat_bufs_free(struct ispstat *stat)
366 dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl, 366 dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl,
367 buf->iovm->sgt->nents, 367 buf->iovm->sgt->nents,
368 DMA_FROM_DEVICE); 368 DMA_FROM_DEVICE);
369 omap_iommu_vfree(isp->domain, isp->iommu, 369 omap_iommu_vfree(isp->domain, isp->dev,
370 buf->iommu_addr); 370 buf->iommu_addr);
371 } else { 371 } else {
372 if (!buf->virt_addr) 372 if (!buf->virt_addr)
@@ -400,7 +400,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
400 struct iovm_struct *iovm; 400 struct iovm_struct *iovm;
401 401
402 WARN_ON(buf->dma_addr); 402 WARN_ON(buf->dma_addr);
403 buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->iommu, 0, 403 buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
404 size, IOMMU_FLAG); 404 size, IOMMU_FLAG);
405 if (IS_ERR((void *)buf->iommu_addr)) { 405 if (IS_ERR((void *)buf->iommu_addr)) {
406 dev_err(stat->isp->dev, 406 dev_err(stat->isp->dev,
@@ -410,7 +410,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
410 return -ENOMEM; 410 return -ENOMEM;
411 } 411 }
412 412
413 iovm = omap_find_iovm_area(isp->iommu, buf->iommu_addr); 413 iovm = omap_find_iovm_area(isp->dev, buf->iommu_addr);
414 if (!iovm || 414 if (!iovm ||
415 !dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents, 415 !dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents,
416 DMA_FROM_DEVICE)) { 416 DMA_FROM_DEVICE)) {
@@ -419,7 +419,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
419 } 419 }
420 buf->iovm = iovm; 420 buf->iovm = iovm;
421 421
422 buf->virt_addr = omap_da_to_va(stat->isp->iommu, 422 buf->virt_addr = omap_da_to_va(stat->isp->dev,
423 (u32)buf->iommu_addr); 423 (u32)buf->iommu_addr);
424 buf->empty = 1; 424 buf->empty = 1;
425 dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated." 425 dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated."
diff --git a/drivers/media/video/omap3isp/ispvideo.c b/drivers/media/video/omap3isp/ispvideo.c
index f2290578448..bd3aebafafa 100644
--- a/drivers/media/video/omap3isp/ispvideo.c
+++ b/drivers/media/video/omap3isp/ispvideo.c
@@ -453,7 +453,7 @@ ispmmu_vmap(struct isp_device *isp, const struct scatterlist *sglist, int sglen)
453 sgt->nents = sglen; 453 sgt->nents = sglen;
454 sgt->orig_nents = sglen; 454 sgt->orig_nents = sglen;
455 455
456 da = omap_iommu_vmap(isp->domain, isp->iommu, 0, sgt, IOMMU_FLAG); 456 da = omap_iommu_vmap(isp->domain, isp->dev, 0, sgt, IOMMU_FLAG);
457 if (IS_ERR_VALUE(da)) 457 if (IS_ERR_VALUE(da))
458 kfree(sgt); 458 kfree(sgt);
459 459
@@ -469,7 +469,7 @@ static void ispmmu_vunmap(struct isp_device *isp, dma_addr_t da)
469{ 469{
470 struct sg_table *sgt; 470 struct sg_table *sgt;
471 471
472 sgt = omap_iommu_vunmap(isp->domain, isp->iommu, (u32)da); 472 sgt = omap_iommu_vunmap(isp->domain, isp->dev, (u32)da);
473 kfree(sgt); 473 kfree(sgt);
474} 474}
475 475
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b0dd08e6a9d..9dd90b30f91 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -175,21 +175,22 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
175 u32 max_requests; 175 u32 max_requests;
176 int pos; 176 int pos;
177 177
178 pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 178 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
179 if (!pos) 179 if (!pos)
180 return -EINVAL; 180 return -EINVAL;
181 181
182 pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 182 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
183 pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); 183 pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
184 if ((control & PCI_PRI_ENABLE) || !(status & PCI_PRI_STATUS_STOPPED)) 184 if ((control & PCI_PRI_CTRL_ENABLE) ||
185 !(status & PCI_PRI_STATUS_STOPPED))
185 return -EBUSY; 186 return -EBUSY;
186 187
187 pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ_OFF, &max_requests); 188 pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests);
188 reqs = min(max_requests, reqs); 189 reqs = min(max_requests, reqs);
189 pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ_OFF, reqs); 190 pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
190 191
191 control |= PCI_PRI_ENABLE; 192 control |= PCI_PRI_CTRL_ENABLE;
192 pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); 193 pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
193 194
194 return 0; 195 return 0;
195} 196}
@@ -206,13 +207,13 @@ void pci_disable_pri(struct pci_dev *pdev)
206 u16 control; 207 u16 control;
207 int pos; 208 int pos;
208 209
209 pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 210 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
210 if (!pos) 211 if (!pos)
211 return; 212 return;
212 213
213 pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 214 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
214 control &= ~PCI_PRI_ENABLE; 215 control &= ~PCI_PRI_CTRL_ENABLE;
215 pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); 216 pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
216} 217}
217EXPORT_SYMBOL_GPL(pci_disable_pri); 218EXPORT_SYMBOL_GPL(pci_disable_pri);
218 219
@@ -227,13 +228,13 @@ bool pci_pri_enabled(struct pci_dev *pdev)
227 u16 control; 228 u16 control;
228 int pos; 229 int pos;
229 230
230 pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 231 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
231 if (!pos) 232 if (!pos)
232 return false; 233 return false;
233 234
234 pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 235 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
235 236
236 return (control & PCI_PRI_ENABLE) ? true : false; 237 return (control & PCI_PRI_CTRL_ENABLE) ? true : false;
237} 238}
238EXPORT_SYMBOL_GPL(pci_pri_enabled); 239EXPORT_SYMBOL_GPL(pci_pri_enabled);
239 240
@@ -249,17 +250,17 @@ int pci_reset_pri(struct pci_dev *pdev)
249 u16 control; 250 u16 control;
250 int pos; 251 int pos;
251 252
252 pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 253 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
253 if (!pos) 254 if (!pos)
254 return -EINVAL; 255 return -EINVAL;
255 256
256 pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 257 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
257 if (control & PCI_PRI_ENABLE) 258 if (control & PCI_PRI_CTRL_ENABLE)
258 return -EBUSY; 259 return -EBUSY;
259 260
260 control |= PCI_PRI_RESET; 261 control |= PCI_PRI_CTRL_RESET;
261 262
262 pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); 263 pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
263 264
264 return 0; 265 return 0;
265} 266}
@@ -282,14 +283,14 @@ bool pci_pri_stopped(struct pci_dev *pdev)
282 u16 control, status; 283 u16 control, status;
283 int pos; 284 int pos;
284 285
285 pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 286 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
286 if (!pos) 287 if (!pos)
287 return true; 288 return true;
288 289
289 pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 290 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
290 pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); 291 pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
291 292
292 if (control & PCI_PRI_ENABLE) 293 if (control & PCI_PRI_CTRL_ENABLE)
293 return false; 294 return false;
294 295
295 return (status & PCI_PRI_STATUS_STOPPED) ? true : false; 296 return (status & PCI_PRI_STATUS_STOPPED) ? true : false;
@@ -311,15 +312,15 @@ int pci_pri_status(struct pci_dev *pdev)
311 u16 status, control; 312 u16 status, control;
312 int pos; 313 int pos;
313 314
314 pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 315 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
315 if (!pos) 316 if (!pos)
316 return -EINVAL; 317 return -EINVAL;
317 318
318 pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 319 pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
319 pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); 320 pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
320 321
321 /* Stopped bit is undefined when enable == 1, so clear it */ 322 /* Stopped bit is undefined when enable == 1, so clear it */
322 if (control & PCI_PRI_ENABLE) 323 if (control & PCI_PRI_CTRL_ENABLE)
323 status &= ~PCI_PRI_STATUS_STOPPED; 324 status &= ~PCI_PRI_STATUS_STOPPED;
324 325
325 return status; 326 return status;
@@ -342,25 +343,25 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
342 u16 control, supported; 343 u16 control, supported;
343 int pos; 344 int pos;
344 345
345 pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); 346 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
346 if (!pos) 347 if (!pos)
347 return -EINVAL; 348 return -EINVAL;
348 349
349 pci_read_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, &control); 350 pci_read_config_word(pdev, pos + PCI_PASID_CTRL, &control);
350 pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); 351 pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
351 352
352 if (!(supported & PCI_PASID_ENABLE)) 353 if (control & PCI_PASID_CTRL_ENABLE)
353 return -EINVAL; 354 return -EINVAL;
354 355
355 supported &= PCI_PASID_EXEC | PCI_PASID_PRIV; 356 supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
356 357
357 /* User wants to enable anything unsupported? */ 358 /* User wants to enable anything unsupported? */
358 if ((supported & features) != features) 359 if ((supported & features) != features)
359 return -EINVAL; 360 return -EINVAL;
360 361
361 control = PCI_PASID_ENABLE | features; 362 control = PCI_PASID_CTRL_ENABLE | features;
362 363
363 pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control); 364 pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
364 365
365 return 0; 366 return 0;
366} 367}
@@ -376,11 +377,11 @@ void pci_disable_pasid(struct pci_dev *pdev)
376 u16 control = 0; 377 u16 control = 0;
377 int pos; 378 int pos;
378 379
379 pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); 380 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
380 if (!pos) 381 if (!pos)
381 return; 382 return;
382 383
383 pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control); 384 pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
384} 385}
385EXPORT_SYMBOL_GPL(pci_disable_pasid); 386EXPORT_SYMBOL_GPL(pci_disable_pasid);
386 387
@@ -391,22 +392,21 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid);
391 * Returns a negative value when no PASI capability is present. 392 * Returns a negative value when no PASI capability is present.
392 * Otherwise is returns a bitmask with supported features. Current 393 * Otherwise is returns a bitmask with supported features. Current
393 * features reported are: 394 * features reported are:
394 * PCI_PASID_ENABLE - PASID capability can be enabled 395 * PCI_PASID_CAP_EXEC - Execute permission supported
395 * PCI_PASID_EXEC - Execute permission supported 396 * PCI_PASID_CAP_PRIV - Priviledged mode supported
396 * PCI_PASID_PRIV - Priviledged mode supported
397 */ 397 */
398int pci_pasid_features(struct pci_dev *pdev) 398int pci_pasid_features(struct pci_dev *pdev)
399{ 399{
400 u16 supported; 400 u16 supported;
401 int pos; 401 int pos;
402 402
403 pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); 403 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
404 if (!pos) 404 if (!pos)
405 return -EINVAL; 405 return -EINVAL;
406 406
407 pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); 407 pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
408 408
409 supported &= PCI_PASID_ENABLE | PCI_PASID_EXEC | PCI_PASID_PRIV; 409 supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
410 410
411 return supported; 411 return supported;
412} 412}
@@ -426,11 +426,11 @@ int pci_max_pasids(struct pci_dev *pdev)
426 u16 supported; 426 u16 supported;
427 int pos; 427 int pos;
428 428
429 pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); 429 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
430 if (!pos) 430 if (!pos)
431 return -EINVAL; 431 return -EINVAL;
432 432
433 pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); 433 pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
434 434
435 supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT; 435 supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT;
436 436
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 838f571027b..9a33fdde2d1 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -45,7 +45,6 @@ extern int pciehp_poll_time;
45extern int pciehp_debug; 45extern int pciehp_debug;
46extern int pciehp_force; 46extern int pciehp_force;
47extern struct workqueue_struct *pciehp_wq; 47extern struct workqueue_struct *pciehp_wq;
48extern struct workqueue_struct *pciehp_ordered_wq;
49 48
50#define dbg(format, arg...) \ 49#define dbg(format, arg...) \
51do { \ 50do { \
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 7ac8358df8f..b8c99d35ac9 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -43,7 +43,6 @@ int pciehp_poll_mode;
43int pciehp_poll_time; 43int pciehp_poll_time;
44int pciehp_force; 44int pciehp_force;
45struct workqueue_struct *pciehp_wq; 45struct workqueue_struct *pciehp_wq;
46struct workqueue_struct *pciehp_ordered_wq;
47 46
48#define DRIVER_VERSION "0.4" 47#define DRIVER_VERSION "0.4"
49#define DRIVER_AUTHOR "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>" 48#define DRIVER_AUTHOR "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>"
@@ -345,18 +344,11 @@ static int __init pcied_init(void)
345 if (!pciehp_wq) 344 if (!pciehp_wq)
346 return -ENOMEM; 345 return -ENOMEM;
347 346
348 pciehp_ordered_wq = alloc_ordered_workqueue("pciehp_ordered", 0);
349 if (!pciehp_ordered_wq) {
350 destroy_workqueue(pciehp_wq);
351 return -ENOMEM;
352 }
353
354 pciehp_firmware_init(); 347 pciehp_firmware_init();
355 retval = pcie_port_service_register(&hpdriver_portdrv); 348 retval = pcie_port_service_register(&hpdriver_portdrv);
356 dbg("pcie_port_service_register = %d\n", retval); 349 dbg("pcie_port_service_register = %d\n", retval);
357 info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 350 info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
358 if (retval) { 351 if (retval) {
359 destroy_workqueue(pciehp_ordered_wq);
360 destroy_workqueue(pciehp_wq); 352 destroy_workqueue(pciehp_wq);
361 dbg("Failure to register service\n"); 353 dbg("Failure to register service\n");
362 } 354 }
@@ -366,9 +358,8 @@ static int __init pcied_init(void)
366static void __exit pcied_cleanup(void) 358static void __exit pcied_cleanup(void)
367{ 359{
368 dbg("unload_pciehpd()\n"); 360 dbg("unload_pciehpd()\n");
369 destroy_workqueue(pciehp_ordered_wq);
370 destroy_workqueue(pciehp_wq);
371 pcie_port_service_unregister(&hpdriver_portdrv); 361 pcie_port_service_unregister(&hpdriver_portdrv);
362 destroy_workqueue(pciehp_wq);
372 info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n"); 363 info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n");
373} 364}
374 365
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 085dbb5fc16..27f44295a65 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -344,7 +344,7 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
344 kfree(info); 344 kfree(info);
345 goto out; 345 goto out;
346 } 346 }
347 queue_work(pciehp_ordered_wq, &info->work); 347 queue_work(pciehp_wq, &info->work);
348 out: 348 out:
349 mutex_unlock(&p_slot->lock); 349 mutex_unlock(&p_slot->lock);
350} 350}
@@ -439,7 +439,7 @@ static void handle_surprise_event(struct slot *p_slot)
439 else 439 else
440 p_slot->state = POWERON_STATE; 440 p_slot->state = POWERON_STATE;
441 441
442 queue_work(pciehp_ordered_wq, &info->work); 442 queue_work(pciehp_wq, &info->work);
443} 443}
444 444
445static void interrupt_event_handler(struct work_struct *work) 445static void interrupt_event_handler(struct work_struct *work)
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 7b1414810ae..bcdbb164362 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -806,7 +806,6 @@ static void pcie_cleanup_slot(struct controller *ctrl)
806 struct slot *slot = ctrl->slot; 806 struct slot *slot = ctrl->slot;
807 cancel_delayed_work(&slot->work); 807 cancel_delayed_work(&slot->work);
808 flush_workqueue(pciehp_wq); 808 flush_workqueue(pciehp_wq);
809 flush_workqueue(pciehp_ordered_wq);
810 kfree(slot); 809 kfree(slot);
811} 810}
812 811
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 0e6d04d7ba4..337e16ab4a9 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -323,6 +323,8 @@ static void free_msi_irqs(struct pci_dev *dev)
323 if (list_is_last(&entry->list, &dev->msi_list)) 323 if (list_is_last(&entry->list, &dev->msi_list))
324 iounmap(entry->mask_base); 324 iounmap(entry->mask_base);
325 } 325 }
326 kobject_del(&entry->kobj);
327 kobject_put(&entry->kobj);
326 list_del(&entry->list); 328 list_del(&entry->list);
327 kfree(entry); 329 kfree(entry);
328 } 330 }
@@ -403,6 +405,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
403} 405}
404EXPORT_SYMBOL_GPL(pci_restore_msi_state); 406EXPORT_SYMBOL_GPL(pci_restore_msi_state);
405 407
408
409#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
410#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
411
412struct msi_attribute {
413 struct attribute attr;
414 ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
415 char *buf);
416 ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
417 const char *buf, size_t count);
418};
419
420static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
421 char *buf)
422{
423 return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
424}
425
426static ssize_t msi_irq_attr_show(struct kobject *kobj,
427 struct attribute *attr, char *buf)
428{
429 struct msi_attribute *attribute = to_msi_attr(attr);
430 struct msi_desc *entry = to_msi_desc(kobj);
431
432 if (!attribute->show)
433 return -EIO;
434
435 return attribute->show(entry, attribute, buf);
436}
437
438static const struct sysfs_ops msi_irq_sysfs_ops = {
439 .show = msi_irq_attr_show,
440};
441
442static struct msi_attribute mode_attribute =
443 __ATTR(mode, S_IRUGO, show_msi_mode, NULL);
444
445
446struct attribute *msi_irq_default_attrs[] = {
447 &mode_attribute.attr,
448 NULL
449};
450
451void msi_kobj_release(struct kobject *kobj)
452{
453 struct msi_desc *entry = to_msi_desc(kobj);
454
455 pci_dev_put(entry->dev);
456}
457
458static struct kobj_type msi_irq_ktype = {
459 .release = msi_kobj_release,
460 .sysfs_ops = &msi_irq_sysfs_ops,
461 .default_attrs = msi_irq_default_attrs,
462};
463
464static int populate_msi_sysfs(struct pci_dev *pdev)
465{
466 struct msi_desc *entry;
467 struct kobject *kobj;
468 int ret;
469 int count = 0;
470
471 pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
472 if (!pdev->msi_kset)
473 return -ENOMEM;
474
475 list_for_each_entry(entry, &pdev->msi_list, list) {
476 kobj = &entry->kobj;
477 kobj->kset = pdev->msi_kset;
478 pci_dev_get(pdev);
479 ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
480 "%u", entry->irq);
481 if (ret)
482 goto out_unroll;
483
484 count++;
485 }
486
487 return 0;
488
489out_unroll:
490 list_for_each_entry(entry, &pdev->msi_list, list) {
491 if (!count)
492 break;
493 kobject_del(&entry->kobj);
494 kobject_put(&entry->kobj);
495 count--;
496 }
497 return ret;
498}
499
406/** 500/**
407 * msi_capability_init - configure device's MSI capability structure 501 * msi_capability_init - configure device's MSI capability structure
408 * @dev: pointer to the pci_dev data structure of MSI device function 502 * @dev: pointer to the pci_dev data structure of MSI device function
@@ -454,6 +548,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
454 return ret; 548 return ret;
455 } 549 }
456 550
551 ret = populate_msi_sysfs(dev);
552 if (ret) {
553 msi_mask_irq(entry, mask, ~mask);
554 free_msi_irqs(dev);
555 return ret;
556 }
557
457 /* Set MSI enabled bits */ 558 /* Set MSI enabled bits */
458 pci_intx_for_msi(dev, 0); 559 pci_intx_for_msi(dev, 0);
459 msi_set_enable(dev, pos, 1); 560 msi_set_enable(dev, pos, 1);
@@ -574,6 +675,12 @@ static int msix_capability_init(struct pci_dev *dev,
574 675
575 msix_program_entries(dev, entries); 676 msix_program_entries(dev, entries);
576 677
678 ret = populate_msi_sysfs(dev);
679 if (ret) {
680 ret = 0;
681 goto error;
682 }
683
577 /* Set MSI-X enabled bits and unmask the function */ 684 /* Set MSI-X enabled bits and unmask the function */
578 pci_intx_for_msi(dev, 0); 685 pci_intx_for_msi(dev, 0);
579 dev->msix_enabled = 1; 686 dev->msix_enabled = 1;
@@ -732,6 +839,8 @@ void pci_disable_msi(struct pci_dev *dev)
732 839
733 pci_msi_shutdown(dev); 840 pci_msi_shutdown(dev);
734 free_msi_irqs(dev); 841 free_msi_irqs(dev);
842 kset_unregister(dev->msi_kset);
843 dev->msi_kset = NULL;
735} 844}
736EXPORT_SYMBOL(pci_disable_msi); 845EXPORT_SYMBOL(pci_disable_msi);
737 846
@@ -830,6 +939,8 @@ void pci_disable_msix(struct pci_dev *dev)
830 939
831 pci_msix_shutdown(dev); 940 pci_msix_shutdown(dev);
832 free_msi_irqs(dev); 941 free_msi_irqs(dev);
942 kset_unregister(dev->msi_kset);
943 dev->msi_kset = NULL;
833} 944}
834EXPORT_SYMBOL(pci_disable_msix); 945EXPORT_SYMBOL(pci_disable_msix);
835 946
@@ -870,5 +981,15 @@ EXPORT_SYMBOL(pci_msi_enabled);
870 981
871void pci_msi_init_pci_dev(struct pci_dev *dev) 982void pci_msi_init_pci_dev(struct pci_dev *dev)
872{ 983{
984 int pos;
873 INIT_LIST_HEAD(&dev->msi_list); 985 INIT_LIST_HEAD(&dev->msi_list);
986
987 /* Disable the msi hardware to avoid screaming interrupts
988 * during boot. This is the power on reset default so
989 * usually this should be a noop.
990 */
991 pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
992 if (pos)
993 msi_set_enable(dev, pos, 0);
994 msix_set_enable(dev, 0);
874} 995}
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 4ecb6408b0d..060fd22a110 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -45,16 +45,20 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
45{ 45{
46 struct pci_dev *pci_dev = context; 46 struct pci_dev *pci_dev = context;
47 47
48 if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) { 48 if (event != ACPI_NOTIFY_DEVICE_WAKE || !pci_dev)
49 return;
50
51 if (!pci_dev->pm_cap || !pci_dev->pme_support
52 || pci_check_pme_status(pci_dev)) {
49 if (pci_dev->pme_poll) 53 if (pci_dev->pme_poll)
50 pci_dev->pme_poll = false; 54 pci_dev->pme_poll = false;
51 55
52 pci_wakeup_event(pci_dev); 56 pci_wakeup_event(pci_dev);
53 pci_check_pme_status(pci_dev);
54 pm_runtime_resume(&pci_dev->dev); 57 pm_runtime_resume(&pci_dev->dev);
55 if (pci_dev->subordinate)
56 pci_pme_wakeup_bus(pci_dev->subordinate);
57 } 58 }
59
60 if (pci_dev->subordinate)
61 pci_pme_wakeup_bus(pci_dev->subordinate);
58} 62}
59 63
60/** 64/**
@@ -395,7 +399,6 @@ static int __init acpi_pci_init(void)
395 399
396 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { 400 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
397 printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n"); 401 printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
398 pcie_clear_aspm();
399 pcie_no_aspm(); 402 pcie_no_aspm();
400 } 403 }
401 404
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index cbfbab18be9..1cfbf228fbb 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -68,7 +68,7 @@ struct pcie_link_state {
68 struct aspm_latency acceptable[8]; 68 struct aspm_latency acceptable[8];
69}; 69};
70 70
71static int aspm_disabled, aspm_force, aspm_clear_state; 71static int aspm_disabled, aspm_force;
72static bool aspm_support_enabled = true; 72static bool aspm_support_enabled = true;
73static DEFINE_MUTEX(aspm_lock); 73static DEFINE_MUTEX(aspm_lock);
74static LIST_HEAD(link_list); 74static LIST_HEAD(link_list);
@@ -500,9 +500,6 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
500 int pos; 500 int pos;
501 u32 reg32; 501 u32 reg32;
502 502
503 if (aspm_clear_state)
504 return -EINVAL;
505
506 /* 503 /*
507 * Some functions in a slot might not all be PCIe functions, 504 * Some functions in a slot might not all be PCIe functions,
508 * very strange. Disable ASPM for the whole slot 505 * very strange. Disable ASPM for the whole slot
@@ -574,9 +571,6 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
574 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) 571 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
575 return; 572 return;
576 573
577 if (aspm_disabled && !aspm_clear_state)
578 return;
579
580 /* VIA has a strange chipset, root port is under a bridge */ 574 /* VIA has a strange chipset, root port is under a bridge */
581 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT && 575 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
582 pdev->bus->self) 576 pdev->bus->self)
@@ -608,7 +602,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
608 * the BIOS's expectation, we'll do so once pci_enable_device() is 602 * the BIOS's expectation, we'll do so once pci_enable_device() is
609 * called. 603 * called.
610 */ 604 */
611 if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) { 605 if (aspm_policy != POLICY_POWERSAVE) {
612 pcie_config_aspm_path(link); 606 pcie_config_aspm_path(link);
613 pcie_set_clkpm(link, policy_to_clkpm_state(link)); 607 pcie_set_clkpm(link, policy_to_clkpm_state(link));
614 } 608 }
@@ -649,8 +643,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
649 struct pci_dev *parent = pdev->bus->self; 643 struct pci_dev *parent = pdev->bus->self;
650 struct pcie_link_state *link, *root, *parent_link; 644 struct pcie_link_state *link, *root, *parent_link;
651 645
652 if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) || 646 if (!pci_is_pcie(pdev) || !parent || !parent->link_state)
653 !parent || !parent->link_state)
654 return; 647 return;
655 if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) && 648 if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
656 (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)) 649 (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
@@ -734,13 +727,18 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
734 * pci_disable_link_state - disable pci device's link state, so the link will 727 * pci_disable_link_state - disable pci device's link state, so the link will
735 * never enter specific states 728 * never enter specific states
736 */ 729 */
737static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) 730static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
731 bool force)
738{ 732{
739 struct pci_dev *parent = pdev->bus->self; 733 struct pci_dev *parent = pdev->bus->self;
740 struct pcie_link_state *link; 734 struct pcie_link_state *link;
741 735
742 if (aspm_disabled || !pci_is_pcie(pdev)) 736 if (aspm_disabled && !force)
737 return;
738
739 if (!pci_is_pcie(pdev))
743 return; 740 return;
741
744 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT || 742 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
745 pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM) 743 pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
746 parent = pdev; 744 parent = pdev;
@@ -768,16 +766,31 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
768 766
769void pci_disable_link_state_locked(struct pci_dev *pdev, int state) 767void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
770{ 768{
771 __pci_disable_link_state(pdev, state, false); 769 __pci_disable_link_state(pdev, state, false, false);
772} 770}
773EXPORT_SYMBOL(pci_disable_link_state_locked); 771EXPORT_SYMBOL(pci_disable_link_state_locked);
774 772
775void pci_disable_link_state(struct pci_dev *pdev, int state) 773void pci_disable_link_state(struct pci_dev *pdev, int state)
776{ 774{
777 __pci_disable_link_state(pdev, state, true); 775 __pci_disable_link_state(pdev, state, true, false);
778} 776}
779EXPORT_SYMBOL(pci_disable_link_state); 777EXPORT_SYMBOL(pci_disable_link_state);
780 778
779void pcie_clear_aspm(struct pci_bus *bus)
780{
781 struct pci_dev *child;
782
783 /*
784 * Clear any ASPM setup that the firmware has carried out on this bus
785 */
786 list_for_each_entry(child, &bus->devices, bus_list) {
787 __pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
788 PCIE_LINK_STATE_L1 |
789 PCIE_LINK_STATE_CLKPM,
790 false, true);
791 }
792}
793
781static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp) 794static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
782{ 795{
783 int i; 796 int i;
@@ -935,6 +948,7 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
935static int __init pcie_aspm_disable(char *str) 948static int __init pcie_aspm_disable(char *str)
936{ 949{
937 if (!strcmp(str, "off")) { 950 if (!strcmp(str, "off")) {
951 aspm_policy = POLICY_DEFAULT;
938 aspm_disabled = 1; 952 aspm_disabled = 1;
939 aspm_support_enabled = false; 953 aspm_support_enabled = false;
940 printk(KERN_INFO "PCIe ASPM is disabled\n"); 954 printk(KERN_INFO "PCIe ASPM is disabled\n");
@@ -947,16 +961,18 @@ static int __init pcie_aspm_disable(char *str)
947 961
948__setup("pcie_aspm=", pcie_aspm_disable); 962__setup("pcie_aspm=", pcie_aspm_disable);
949 963
950void pcie_clear_aspm(void)
951{
952 if (!aspm_force)
953 aspm_clear_state = 1;
954}
955
956void pcie_no_aspm(void) 964void pcie_no_aspm(void)
957{ 965{
958 if (!aspm_force) 966 /*
967 * Disabling ASPM is intended to prevent the kernel from modifying
968 * existing hardware state, not to clear existing state. To that end:
969 * (a) set policy to POLICY_DEFAULT in order to avoid changing state
970 * (b) prevent userspace from changing policy
971 */
972 if (!aspm_force) {
973 aspm_policy = POLICY_DEFAULT;
959 aspm_disabled = 1; 974 aspm_disabled = 1;
975 }
960} 976}
961 977
962/** 978/**