aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt12
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.txt43
-rw-r--r--Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt2
-rw-r--r--arch/powerpc/kernel/eeh.c2
-rw-r--r--arch/powerpc/kernel/iommu.c4
-rw-r--r--arch/x86/kernel/tboot.c2
-rw-r--r--drivers/acpi/arm64/iort.c23
-rw-r--r--drivers/dma/sh/rcar-dmac.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c2
-rw-r--r--drivers/gpu/drm/i915/intel_display.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c2
-rw-r--r--drivers/iommu/amd_iommu.c275
-rw-r--r--drivers/iommu/amd_iommu_init.c64
-rw-r--r--drivers/iommu/amd_iommu_types.h1
-rw-r--r--drivers/iommu/amd_iommu_v2.c2
-rw-r--r--drivers/iommu/arm-smmu-v3.c63
-rw-r--r--drivers/iommu/arm-smmu.c209
-rw-r--r--drivers/iommu/dma-iommu.c22
-rw-r--r--drivers/iommu/dmar.c91
-rw-r--r--drivers/iommu/intel-iommu.c351
-rw-r--r--drivers/iommu/intel-pasid.c449
-rw-r--r--drivers/iommu/intel-pasid.h40
-rw-r--r--drivers/iommu/intel-svm.c171
-rw-r--r--drivers/iommu/intel_irq_remapping.c6
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c4
-rw-r--r--drivers/iommu/iommu-sysfs.c14
-rw-r--r--drivers/iommu/iommu.c113
-rw-r--r--drivers/iommu/ipmmu-vmsa.c88
-rw-r--r--drivers/iommu/irq_remapping.c1
-rw-r--r--drivers/iommu/msm_iommu.c13
-rw-r--r--drivers/iommu/mtk_iommu.c25
-rw-r--r--drivers/iommu/mtk_iommu_v1.c47
-rw-r--r--drivers/iommu/of_iommu.c16
-rw-r--r--drivers/iommu/omap-iommu-debug.c25
-rw-r--r--drivers/iommu/qcom_iommu.c34
-rw-r--r--drivers/iommu/rockchip-iommu.c13
-rw-r--r--drivers/iommu/tegra-gart.c37
-rw-r--r--drivers/iommu/tegra-smmu.c26
-rw-r--r--drivers/misc/mic/scif/scif_rma.c2
-rw-r--r--drivers/misc/mic/scif/scif_rma.h2
-rw-r--r--drivers/usb/host/xhci.c2
-rw-r--r--drivers/vfio/vfio_iommu_type1.c33
-rw-r--r--include/linux/device.h10
-rw-r--r--include/linux/dma_remapping.h58
-rw-r--r--include/linux/intel-iommu.h108
-rw-r--r--include/linux/iommu.h18
46 files changed, 1612 insertions, 917 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 37e235be1d35..408781ee142c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1690,12 +1690,12 @@
1690 By default, super page will be supported if Intel IOMMU 1690 By default, super page will be supported if Intel IOMMU
1691 has the capability. With this option, super page will 1691 has the capability. With this option, super page will
1692 not be supported. 1692 not be supported.
1693 ecs_off [Default Off] 1693 sm_off [Default Off]
1694 By default, extended context tables will be supported if 1694 By default, scalable mode will be supported if the
1695 the hardware advertises that it has support both for the 1695 hardware advertises that it has support for the scalable
1696 extended tables themselves, and also PASID support. With 1696 mode translation. With this option set, scalable mode
1697 this option set, extended tables will not be used even 1697 will not be used even on hardware which claims to support
1698 on hardware which claims to support them. 1698 it.
1699 tboot_noforce [Default Off] 1699 tboot_noforce [Default Off]
1700 Do not force the Intel IOMMU enabled under tboot. 1700 Do not force the Intel IOMMU enabled under tboot.
1701 By default, tboot will force Intel IOMMU on, which 1701 By default, tboot will force Intel IOMMU on, which
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 8a6ffce12af5..3133f3ba7567 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -17,10 +17,20 @@ conditions.
17 "arm,mmu-401" 17 "arm,mmu-401"
18 "arm,mmu-500" 18 "arm,mmu-500"
19 "cavium,smmu-v2" 19 "cavium,smmu-v2"
20 "qcom,smmu-v2"
20 21
21 depending on the particular implementation and/or the 22 depending on the particular implementation and/or the
22 version of the architecture implemented. 23 version of the architecture implemented.
23 24
25 Qcom SoCs must contain, as below, SoC-specific compatibles
26 along with "qcom,smmu-v2":
27 "qcom,msm8996-smmu-v2", "qcom,smmu-v2",
28 "qcom,sdm845-smmu-v2", "qcom,smmu-v2".
29
30 Qcom SoCs implementing "arm,mmu-500" must also include,
31 as below, SoC-specific compatibles:
32 "qcom,sdm845-smmu-500", "arm,mmu-500"
33
24- reg : Base address and size of the SMMU. 34- reg : Base address and size of the SMMU.
25 35
26- #global-interrupts : The number of global interrupts exposed by the 36- #global-interrupts : The number of global interrupts exposed by the
@@ -71,6 +81,22 @@ conditions.
71 or using stream matching with #iommu-cells = <2>, and 81 or using stream matching with #iommu-cells = <2>, and
72 may be ignored if present in such cases. 82 may be ignored if present in such cases.
73 83
84- clock-names: List of the names of clocks input to the device. The
85 required list depends on particular implementation and
86 is as follows:
87 - for "qcom,smmu-v2":
88 - "bus": clock required for downstream bus access and
89 for the smmu ptw,
90 - "iface": clock required to access smmu's registers
91 through the TCU's programming interface.
92 - unspecified for other implementations.
93
94- clocks: Specifiers for all clocks listed in the clock-names property,
95 as per generic clock bindings.
96
97- power-domains: Specifiers for power domains required to be powered on for
98 the SMMU to operate, as per generic power domain bindings.
99
74** Deprecated properties: 100** Deprecated properties:
75 101
76- mmu-masters (deprecated in favour of the generic "iommus" binding) : 102- mmu-masters (deprecated in favour of the generic "iommus" binding) :
@@ -137,3 +163,20 @@ conditions.
137 iommu-map = <0 &smmu3 0 0x400>; 163 iommu-map = <0 &smmu3 0 0x400>;
138 ... 164 ...
139 }; 165 };
166
167 /* Qcom's arm,smmu-v2 implementation */
168 smmu4: iommu@d00000 {
169 compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
170 reg = <0xd00000 0x10000>;
171
172 #global-interrupts = <1>;
173 interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>,
174 <GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH>,
175 <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>;
176 #iommu-cells = <1>;
177 power-domains = <&mmcc MDSS_GDSC>;
178
179 clocks = <&mmcc SMMU_MDP_AXI_CLK>,
180 <&mmcc SMMU_MDP_AHB_CLK>;
181 clock-names = "bus", "iface";
182 };
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
index 377ee639d103..b6bfbec3a849 100644
--- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -14,6 +14,8 @@ Required Properties:
14 - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU. 14 - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU.
15 - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU. 15 - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU.
16 - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU. 16 - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
17 - "renesas,ipmmu-r8a774a1" for the R8A774A1 (RZ/G2M) IPMMU.
18 - "renesas,ipmmu-r8a774c0" for the R8A774C0 (RZ/G2E) IPMMU.
17 - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU. 19 - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
18 - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU. 20 - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
19 - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU. 21 - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 3230137469ab..ae05203eb4de 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1472,7 +1472,7 @@ static int dev_has_iommu_table(struct device *dev, void *data)
1472 if (!dev) 1472 if (!dev)
1473 return 0; 1473 return 0;
1474 1474
1475 if (dev->iommu_group) { 1475 if (device_iommu_mapped(dev)) {
1476 *ppdev = pdev; 1476 *ppdev = pdev;
1477 return 1; 1477 return 1;
1478 } 1478 }
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index d0625480b59e..33bbd59cff79 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1088,7 +1088,7 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
1088 if (!device_is_registered(dev)) 1088 if (!device_is_registered(dev))
1089 return -ENOENT; 1089 return -ENOENT;
1090 1090
1091 if (dev->iommu_group) { 1091 if (device_iommu_mapped(dev)) {
1092 pr_debug("%s: Skipping device %s with iommu group %d\n", 1092 pr_debug("%s: Skipping device %s with iommu group %d\n",
1093 __func__, dev_name(dev), 1093 __func__, dev_name(dev),
1094 iommu_group_id(dev->iommu_group)); 1094 iommu_group_id(dev->iommu_group));
@@ -1109,7 +1109,7 @@ void iommu_del_device(struct device *dev)
1109 * and we needn't detach them from the associated 1109 * and we needn't detach them from the associated
1110 * IOMMU groups 1110 * IOMMU groups
1111 */ 1111 */
1112 if (!dev->iommu_group) { 1112 if (!device_iommu_mapped(dev)) {
1113 pr_debug("iommu_tce: skipping device %s with no tbl\n", 1113 pr_debug("iommu_tce: skipping device %s with no tbl\n",
1114 dev_name(dev)); 1114 dev_name(dev));
1115 return; 1115 return;
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a2486f444073..6e5ef8fb8a02 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -19,7 +19,7 @@
19 * 19 *
20 */ 20 */
21 21
22#include <linux/dma_remapping.h> 22#include <linux/intel-iommu.h>
23#include <linux/init_task.h> 23#include <linux/init_task.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/export.h> 25#include <linux/export.h>
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 2159ad9bf9ed..fdd90ffceb85 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -779,7 +779,7 @@ static inline bool iort_iommu_driver_enabled(u8 type)
779static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) 779static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
780{ 780{
781 struct acpi_iort_node *iommu; 781 struct acpi_iort_node *iommu;
782 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 782 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
783 783
784 iommu = iort_get_iort_node(fwspec->iommu_fwnode); 784 iommu = iort_get_iort_node(fwspec->iommu_fwnode);
785 785
@@ -794,9 +794,10 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
794 return NULL; 794 return NULL;
795} 795}
796 796
797static inline const struct iommu_ops *iort_fwspec_iommu_ops( 797static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
798 struct iommu_fwspec *fwspec)
799{ 798{
799 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
800
800 return (fwspec && fwspec->ops) ? fwspec->ops : NULL; 801 return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
801} 802}
802 803
@@ -805,8 +806,8 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops,
805{ 806{
806 int err = 0; 807 int err = 0;
807 808
808 if (ops->add_device && dev->bus && !dev->iommu_group) 809 if (dev->bus && !device_iommu_mapped(dev))
809 err = ops->add_device(dev); 810 err = iommu_probe_device(dev);
810 811
811 return err; 812 return err;
812} 813}
@@ -824,6 +825,7 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops,
824 */ 825 */
825int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) 826int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
826{ 827{
828 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
827 struct acpi_iort_its_group *its; 829 struct acpi_iort_its_group *its;
828 struct acpi_iort_node *iommu_node, *its_node = NULL; 830 struct acpi_iort_node *iommu_node, *its_node = NULL;
829 int i, resv = 0; 831 int i, resv = 0;
@@ -841,9 +843,9 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
841 * a given PCI or named component may map IDs to. 843 * a given PCI or named component may map IDs to.
842 */ 844 */
843 845
844 for (i = 0; i < dev->iommu_fwspec->num_ids; i++) { 846 for (i = 0; i < fwspec->num_ids; i++) {
845 its_node = iort_node_map_id(iommu_node, 847 its_node = iort_node_map_id(iommu_node,
846 dev->iommu_fwspec->ids[i], 848 fwspec->ids[i],
847 NULL, IORT_MSI_TYPE); 849 NULL, IORT_MSI_TYPE);
848 if (its_node) 850 if (its_node)
849 break; 851 break;
@@ -874,8 +876,7 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
874 return (resv == its->its_count) ? resv : -ENODEV; 876 return (resv == its->its_count) ? resv : -ENODEV;
875} 877}
876#else 878#else
877static inline const struct iommu_ops *iort_fwspec_iommu_ops( 879static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev);
878 struct iommu_fwspec *fwspec)
879{ return NULL; } 880{ return NULL; }
880static inline int iort_add_device_replay(const struct iommu_ops *ops, 881static inline int iort_add_device_replay(const struct iommu_ops *ops,
881 struct device *dev) 882 struct device *dev)
@@ -1045,7 +1046,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
1045 * If we already translated the fwspec there 1046 * If we already translated the fwspec there
1046 * is nothing left to do, return the iommu_ops. 1047 * is nothing left to do, return the iommu_ops.
1047 */ 1048 */
1048 ops = iort_fwspec_iommu_ops(dev->iommu_fwspec); 1049 ops = iort_fwspec_iommu_ops(dev);
1049 if (ops) 1050 if (ops)
1050 return ops; 1051 return ops;
1051 1052
@@ -1084,7 +1085,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
1084 * add_device callback for dev, replay it to get things in order. 1085 * add_device callback for dev, replay it to get things in order.
1085 */ 1086 */
1086 if (!err) { 1087 if (!err) {
1087 ops = iort_fwspec_iommu_ops(dev->iommu_fwspec); 1088 ops = iort_fwspec_iommu_ops(dev);
1088 err = iort_add_device_replay(ops, dev); 1089 err = iort_add_device_replay(ops, dev);
1089 } 1090 }
1090 1091
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 74fa2b1a6a86..2b4f25698169 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1809,7 +1809,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
1809 * level we can't disable it selectively, so ignore channel 0 for now if 1809 * level we can't disable it selectively, so ignore channel 0 for now if
1810 * the device is part of an IOMMU group. 1810 * the device is part of an IOMMU group.
1811 */ 1811 */
1812 if (pdev->dev.iommu_group) { 1812 if (device_iommu_mapped(&pdev->dev)) {
1813 dmac->n_channels--; 1813 dmac->n_channels--;
1814 channels_offset = 1; 1814 channels_offset = 1;
1815 } 1815 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 786d719e652d..8ff6b581cf1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -26,7 +26,7 @@
26 * 26 *
27 */ 27 */
28 28
29#include <linux/dma_remapping.h> 29#include <linux/intel-iommu.h>
30#include <linux/reservation.h> 30#include <linux/reservation.h>
31#include <linux/sync_file.h> 31#include <linux/sync_file.h>
32#include <linux/uaccess.h> 32#include <linux/uaccess.h>
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 07c861884c70..3da9c0f9e948 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -46,7 +46,7 @@
46#include <drm/drm_plane_helper.h> 46#include <drm/drm_plane_helper.h>
47#include <drm/drm_rect.h> 47#include <drm/drm_rect.h>
48#include <drm/drm_atomic_uapi.h> 48#include <drm/drm_atomic_uapi.h>
49#include <linux/dma_remapping.h> 49#include <linux/intel-iommu.h>
50#include <linux/reservation.h> 50#include <linux/reservation.h>
51 51
52/* Primary plane formats for gen <= 3 */ 52/* Primary plane formats for gen <= 3 */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9fd5fbe8bebf..25afb1d594e3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -34,7 +34,7 @@
34#include <drm/ttm/ttm_placement.h> 34#include <drm/ttm/ttm_placement.h>
35#include <drm/ttm/ttm_bo_driver.h> 35#include <drm/ttm/ttm_bo_driver.h>
36#include <drm/ttm/ttm_module.h> 36#include <drm/ttm/ttm_module.h>
37#include <linux/dma_remapping.h> 37#include <linux/intel-iommu.h>
38 38
39#define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" 39#define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
40#define VMWGFX_CHIP_SVGAII 0 40#define VMWGFX_CHIP_SVGAII 0
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 567221cca13c..87ba23a75b38 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -17,6 +17,8 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#define pr_fmt(fmt) "AMD-Vi: " fmt
21
20#include <linux/ratelimit.h> 22#include <linux/ratelimit.h>
21#include <linux/pci.h> 23#include <linux/pci.h>
22#include <linux/acpi.h> 24#include <linux/acpi.h>
@@ -277,7 +279,7 @@ static u16 get_alias(struct device *dev)
277 return pci_alias; 279 return pci_alias;
278 } 280 }
279 281
280 pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " 282 pr_info("Using IVRS reported alias %02x:%02x.%d "
281 "for device %s[%04x:%04x], kernel reported alias " 283 "for device %s[%04x:%04x], kernel reported alias "
282 "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), 284 "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
283 PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, 285 PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
@@ -291,7 +293,7 @@ static u16 get_alias(struct device *dev)
291 if (pci_alias == devid && 293 if (pci_alias == devid &&
292 PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { 294 PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
293 pci_add_dma_alias(pdev, ivrs_alias & 0xff); 295 pci_add_dma_alias(pdev, ivrs_alias & 0xff);
294 pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", 296 pr_info("Added PCI DMA alias %02x.%d for %s\n",
295 PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), 297 PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
296 dev_name(dev)); 298 dev_name(dev));
297 } 299 }
@@ -436,7 +438,14 @@ static int iommu_init_device(struct device *dev)
436 438
437 dev_data->alias = get_alias(dev); 439 dev_data->alias = get_alias(dev);
438 440
439 if (dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { 441 /*
442 * By default we use passthrough mode for IOMMUv2 capable device.
443 * But if amd_iommu=force_isolation is set (e.g. to debug DMA to
444 * invalid address), we ignore the capability for the device so
445 * it'll be forced to go into translation mode.
446 */
447 if ((iommu_pass_through || !amd_iommu_force_isolation) &&
448 dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
440 struct amd_iommu *iommu; 449 struct amd_iommu *iommu;
441 450
442 iommu = amd_iommu_rlookup_table[dev_data->devid]; 451 iommu = amd_iommu_rlookup_table[dev_data->devid];
@@ -511,7 +520,7 @@ static void dump_dte_entry(u16 devid)
511 int i; 520 int i;
512 521
513 for (i = 0; i < 4; ++i) 522 for (i = 0; i < 4; ++i)
514 pr_err("AMD-Vi: DTE[%d]: %016llx\n", i, 523 pr_err("DTE[%d]: %016llx\n", i,
515 amd_iommu_dev_table[devid].data[i]); 524 amd_iommu_dev_table[devid].data[i]);
516} 525}
517 526
@@ -521,7 +530,7 @@ static void dump_command(unsigned long phys_addr)
521 int i; 530 int i;
522 531
523 for (i = 0; i < 4; ++i) 532 for (i = 0; i < 4; ++i)
524 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); 533 pr_err("CMD[%d]: %08x\n", i, cmd->data[i]);
525} 534}
526 535
527static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, 536static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
@@ -536,10 +545,10 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
536 dev_data = get_dev_data(&pdev->dev); 545 dev_data = get_dev_data(&pdev->dev);
537 546
538 if (dev_data && __ratelimit(&dev_data->rs)) { 547 if (dev_data && __ratelimit(&dev_data->rs)) {
539 dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n", 548 dev_err(&pdev->dev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
540 domain_id, address, flags); 549 domain_id, address, flags);
541 } else if (printk_ratelimit()) { 550 } else if (printk_ratelimit()) {
542 pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", 551 pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
543 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 552 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
544 domain_id, address, flags); 553 domain_id, address, flags);
545 } 554 }
@@ -566,7 +575,7 @@ retry:
566 if (type == 0) { 575 if (type == 0) {
567 /* Did we hit the erratum? */ 576 /* Did we hit the erratum? */
568 if (++count == LOOP_TIMEOUT) { 577 if (++count == LOOP_TIMEOUT) {
569 pr_err("AMD-Vi: No event written to event log\n"); 578 pr_err("No event written to event log\n");
570 return; 579 return;
571 } 580 }
572 udelay(1); 581 udelay(1);
@@ -576,43 +585,41 @@ retry:
576 if (type == EVENT_TYPE_IO_FAULT) { 585 if (type == EVENT_TYPE_IO_FAULT) {
577 amd_iommu_report_page_fault(devid, pasid, address, flags); 586 amd_iommu_report_page_fault(devid, pasid, address, flags);
578 return; 587 return;
579 } else {
580 dev_err(dev, "AMD-Vi: Event logged [");
581 } 588 }
582 589
583 switch (type) { 590 switch (type) {
584 case EVENT_TYPE_ILL_DEV: 591 case EVENT_TYPE_ILL_DEV:
585 dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", 592 dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
586 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 593 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
587 pasid, address, flags); 594 pasid, address, flags);
588 dump_dte_entry(devid); 595 dump_dte_entry(devid);
589 break; 596 break;
590 case EVENT_TYPE_DEV_TAB_ERR: 597 case EVENT_TYPE_DEV_TAB_ERR:
591 dev_err(dev, "DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " 598 dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
592 "address=0x%016llx flags=0x%04x]\n", 599 "address=0x%llx flags=0x%04x]\n",
593 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 600 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
594 address, flags); 601 address, flags);
595 break; 602 break;
596 case EVENT_TYPE_PAGE_TAB_ERR: 603 case EVENT_TYPE_PAGE_TAB_ERR:
597 dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", 604 dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
598 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 605 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
599 pasid, address, flags); 606 pasid, address, flags);
600 break; 607 break;
601 case EVENT_TYPE_ILL_CMD: 608 case EVENT_TYPE_ILL_CMD:
602 dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); 609 dev_err(dev, "Event logged [ILLEGAL_COMMAND_ERROR address=0x%llx]\n", address);
603 dump_command(address); 610 dump_command(address);
604 break; 611 break;
605 case EVENT_TYPE_CMD_HARD_ERR: 612 case EVENT_TYPE_CMD_HARD_ERR:
606 dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx flags=0x%04x]\n", 613 dev_err(dev, "Event logged [COMMAND_HARDWARE_ERROR address=0x%llx flags=0x%04x]\n",
607 address, flags); 614 address, flags);
608 break; 615 break;
609 case EVENT_TYPE_IOTLB_INV_TO: 616 case EVENT_TYPE_IOTLB_INV_TO:
610 dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%016llx]\n", 617 dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n",
611 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 618 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
612 address); 619 address);
613 break; 620 break;
614 case EVENT_TYPE_INV_DEV_REQ: 621 case EVENT_TYPE_INV_DEV_REQ:
615 dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", 622 dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
616 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 623 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
617 pasid, address, flags); 624 pasid, address, flags);
618 break; 625 break;
@@ -620,12 +627,12 @@ retry:
620 pasid = ((event[0] >> 16) & 0xFFFF) 627 pasid = ((event[0] >> 16) & 0xFFFF)
621 | ((event[1] << 6) & 0xF0000); 628 | ((event[1] << 6) & 0xF0000);
622 tag = event[1] & 0x03FF; 629 tag = event[1] & 0x03FF;
623 dev_err(dev, "INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", 630 dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
624 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 631 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
625 pasid, address, flags); 632 pasid, address, flags);
626 break; 633 break;
627 default: 634 default:
628 dev_err(dev, "UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n", 635 dev_err(dev, "Event logged [UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
629 event[0], event[1], event[2], event[3]); 636 event[0], event[1], event[2], event[3]);
630 } 637 }
631 638
@@ -652,7 +659,7 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
652 struct amd_iommu_fault fault; 659 struct amd_iommu_fault fault;
653 660
654 if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { 661 if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
655 pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); 662 pr_err_ratelimited("Unknown PPR request received\n");
656 return; 663 return;
657 } 664 }
658 665
@@ -757,12 +764,12 @@ static void iommu_poll_ga_log(struct amd_iommu *iommu)
757 if (!iommu_ga_log_notifier) 764 if (!iommu_ga_log_notifier)
758 break; 765 break;
759 766
760 pr_debug("AMD-Vi: %s: devid=%#x, ga_tag=%#x\n", 767 pr_debug("%s: devid=%#x, ga_tag=%#x\n",
761 __func__, GA_DEVID(log_entry), 768 __func__, GA_DEVID(log_entry),
762 GA_TAG(log_entry)); 769 GA_TAG(log_entry));
763 770
764 if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0) 771 if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0)
765 pr_err("AMD-Vi: GA log notifier failed.\n"); 772 pr_err("GA log notifier failed.\n");
766 break; 773 break;
767 default: 774 default:
768 break; 775 break;
@@ -787,18 +794,18 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
787 iommu->mmio_base + MMIO_STATUS_OFFSET); 794 iommu->mmio_base + MMIO_STATUS_OFFSET);
788 795
789 if (status & MMIO_STATUS_EVT_INT_MASK) { 796 if (status & MMIO_STATUS_EVT_INT_MASK) {
790 pr_devel("AMD-Vi: Processing IOMMU Event Log\n"); 797 pr_devel("Processing IOMMU Event Log\n");
791 iommu_poll_events(iommu); 798 iommu_poll_events(iommu);
792 } 799 }
793 800
794 if (status & MMIO_STATUS_PPR_INT_MASK) { 801 if (status & MMIO_STATUS_PPR_INT_MASK) {
795 pr_devel("AMD-Vi: Processing IOMMU PPR Log\n"); 802 pr_devel("Processing IOMMU PPR Log\n");
796 iommu_poll_ppr_log(iommu); 803 iommu_poll_ppr_log(iommu);
797 } 804 }
798 805
799#ifdef CONFIG_IRQ_REMAP 806#ifdef CONFIG_IRQ_REMAP
800 if (status & MMIO_STATUS_GALOG_INT_MASK) { 807 if (status & MMIO_STATUS_GALOG_INT_MASK) {
801 pr_devel("AMD-Vi: Processing IOMMU GA Log\n"); 808 pr_devel("Processing IOMMU GA Log\n");
802 iommu_poll_ga_log(iommu); 809 iommu_poll_ga_log(iommu);
803 } 810 }
804#endif 811#endif
@@ -842,7 +849,7 @@ static int wait_on_sem(volatile u64 *sem)
842 } 849 }
843 850
844 if (i == LOOP_TIMEOUT) { 851 if (i == LOOP_TIMEOUT) {
845 pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); 852 pr_alert("Completion-Wait loop timed out\n");
846 return -EIO; 853 return -EIO;
847 } 854 }
848 855
@@ -1034,7 +1041,7 @@ again:
1034 /* Skip udelay() the first time around */ 1041 /* Skip udelay() the first time around */
1035 if (count++) { 1042 if (count++) {
1036 if (count == LOOP_TIMEOUT) { 1043 if (count == LOOP_TIMEOUT) {
1037 pr_err("AMD-Vi: Command buffer timeout\n"); 1044 pr_err("Command buffer timeout\n");
1038 return -EIO; 1045 return -EIO;
1039 } 1046 }
1040 1047
@@ -1315,6 +1322,101 @@ static void domain_flush_devices(struct protection_domain *domain)
1315 * 1322 *
1316 ****************************************************************************/ 1323 ****************************************************************************/
1317 1324
1325static void free_page_list(struct page *freelist)
1326{
1327 while (freelist != NULL) {
1328 unsigned long p = (unsigned long)page_address(freelist);
1329 freelist = freelist->freelist;
1330 free_page(p);
1331 }
1332}
1333
1334static struct page *free_pt_page(unsigned long pt, struct page *freelist)
1335{
1336 struct page *p = virt_to_page((void *)pt);
1337
1338 p->freelist = freelist;
1339
1340 return p;
1341}
1342
1343#define DEFINE_FREE_PT_FN(LVL, FN) \
1344static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
1345{ \
1346 unsigned long p; \
1347 u64 *pt; \
1348 int i; \
1349 \
1350 pt = (u64 *)__pt; \
1351 \
1352 for (i = 0; i < 512; ++i) { \
1353 /* PTE present? */ \
1354 if (!IOMMU_PTE_PRESENT(pt[i])) \
1355 continue; \
1356 \
1357 /* Large PTE? */ \
1358 if (PM_PTE_LEVEL(pt[i]) == 0 || \
1359 PM_PTE_LEVEL(pt[i]) == 7) \
1360 continue; \
1361 \
1362 p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
1363 freelist = FN(p, freelist); \
1364 } \
1365 \
1366 return free_pt_page((unsigned long)pt, freelist); \
1367}
1368
1369DEFINE_FREE_PT_FN(l2, free_pt_page)
1370DEFINE_FREE_PT_FN(l3, free_pt_l2)
1371DEFINE_FREE_PT_FN(l4, free_pt_l3)
1372DEFINE_FREE_PT_FN(l5, free_pt_l4)
1373DEFINE_FREE_PT_FN(l6, free_pt_l5)
1374
1375static struct page *free_sub_pt(unsigned long root, int mode,
1376 struct page *freelist)
1377{
1378 switch (mode) {
1379 case PAGE_MODE_NONE:
1380 case PAGE_MODE_7_LEVEL:
1381 break;
1382 case PAGE_MODE_1_LEVEL:
1383 freelist = free_pt_page(root, freelist);
1384 break;
1385 case PAGE_MODE_2_LEVEL:
1386 freelist = free_pt_l2(root, freelist);
1387 break;
1388 case PAGE_MODE_3_LEVEL:
1389 freelist = free_pt_l3(root, freelist);
1390 break;
1391 case PAGE_MODE_4_LEVEL:
1392 freelist = free_pt_l4(root, freelist);
1393 break;
1394 case PAGE_MODE_5_LEVEL:
1395 freelist = free_pt_l5(root, freelist);
1396 break;
1397 case PAGE_MODE_6_LEVEL:
1398 freelist = free_pt_l6(root, freelist);
1399 break;
1400 default:
1401 BUG();
1402 }
1403
1404 return freelist;
1405}
1406
1407static void free_pagetable(struct protection_domain *domain)
1408{
1409 unsigned long root = (unsigned long)domain->pt_root;
1410 struct page *freelist = NULL;
1411
1412 BUG_ON(domain->mode < PAGE_MODE_NONE ||
1413 domain->mode > PAGE_MODE_6_LEVEL);
1414
1415 free_sub_pt(root, domain->mode, freelist);
1416
1417 free_page_list(freelist);
1418}
1419
1318/* 1420/*
1319 * This function is used to add another level to an IO page table. Adding 1421 * This function is used to add another level to an IO page table. Adding
1320 * another level increases the size of the address space by 9 bits to a size up 1422 * another level increases the size of the address space by 9 bits to a size up
@@ -1363,10 +1465,13 @@ static u64 *alloc_pte(struct protection_domain *domain,
1363 1465
1364 while (level > end_lvl) { 1466 while (level > end_lvl) {
1365 u64 __pte, __npte; 1467 u64 __pte, __npte;
1468 int pte_level;
1366 1469
1367 __pte = *pte; 1470 __pte = *pte;
1471 pte_level = PM_PTE_LEVEL(__pte);
1368 1472
1369 if (!IOMMU_PTE_PRESENT(__pte)) { 1473 if (!IOMMU_PTE_PRESENT(__pte) ||
1474 pte_level == PAGE_MODE_7_LEVEL) {
1370 page = (u64 *)get_zeroed_page(gfp); 1475 page = (u64 *)get_zeroed_page(gfp);
1371 if (!page) 1476 if (!page)
1372 return NULL; 1477 return NULL;
@@ -1374,19 +1479,21 @@ static u64 *alloc_pte(struct protection_domain *domain,
1374 __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); 1479 __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
1375 1480
1376 /* pte could have been changed somewhere. */ 1481 /* pte could have been changed somewhere. */
1377 if (cmpxchg64(pte, __pte, __npte) != __pte) { 1482 if (cmpxchg64(pte, __pte, __npte) != __pte)
1378 free_page((unsigned long)page); 1483 free_page((unsigned long)page);
1379 continue; 1484 else if (pte_level == PAGE_MODE_7_LEVEL)
1380 } 1485 domain->updated = true;
1486
1487 continue;
1381 } 1488 }
1382 1489
1383 /* No level skipping support yet */ 1490 /* No level skipping support yet */
1384 if (PM_PTE_LEVEL(*pte) != level) 1491 if (pte_level != level)
1385 return NULL; 1492 return NULL;
1386 1493
1387 level -= 1; 1494 level -= 1;
1388 1495
1389 pte = IOMMU_PTE_PAGE(*pte); 1496 pte = IOMMU_PTE_PAGE(__pte);
1390 1497
1391 if (pte_page && level == end_lvl) 1498 if (pte_page && level == end_lvl)
1392 *pte_page = pte; 1499 *pte_page = pte;
@@ -1455,6 +1562,25 @@ static u64 *fetch_pte(struct protection_domain *domain,
1455 return pte; 1562 return pte;
1456} 1563}
1457 1564
1565static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
1566{
1567 unsigned long pt;
1568 int mode;
1569
1570 while (cmpxchg64(pte, pteval, 0) != pteval) {
1571 pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
1572 pteval = *pte;
1573 }
1574
1575 if (!IOMMU_PTE_PRESENT(pteval))
1576 return freelist;
1577
1578 pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
1579 mode = IOMMU_PTE_MODE(pteval);
1580
1581 return free_sub_pt(pt, mode, freelist);
1582}
1583
1458/* 1584/*
1459 * Generic mapping functions. It maps a physical address into a DMA 1585 * Generic mapping functions. It maps a physical address into a DMA
1460 * address space. It allocates the page table pages if necessary. 1586 * address space. It allocates the page table pages if necessary.
@@ -1469,6 +1595,7 @@ static int iommu_map_page(struct protection_domain *dom,
1469 int prot, 1595 int prot,
1470 gfp_t gfp) 1596 gfp_t gfp)
1471{ 1597{
1598 struct page *freelist = NULL;
1472 u64 __pte, *pte; 1599 u64 __pte, *pte;
1473 int i, count; 1600 int i, count;
1474 1601
@@ -1485,8 +1612,10 @@ static int iommu_map_page(struct protection_domain *dom,
1485 return -ENOMEM; 1612 return -ENOMEM;
1486 1613
1487 for (i = 0; i < count; ++i) 1614 for (i = 0; i < count; ++i)
1488 if (IOMMU_PTE_PRESENT(pte[i])) 1615 freelist = free_clear_pte(&pte[i], pte[i], freelist);
1489 return -EBUSY; 1616
1617 if (freelist != NULL)
1618 dom->updated = true;
1490 1619
1491 if (count > 1) { 1620 if (count > 1) {
1492 __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); 1621 __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
@@ -1504,6 +1633,9 @@ static int iommu_map_page(struct protection_domain *dom,
1504 1633
1505 update_domain(dom); 1634 update_domain(dom);
1506 1635
1636 /* Everything flushed out, free pages now */
1637 free_page_list(freelist);
1638
1507 return 0; 1639 return 0;
1508} 1640}
1509 1641
@@ -1636,67 +1768,6 @@ static void domain_id_free(int id)
1636 spin_unlock(&pd_bitmap_lock); 1768 spin_unlock(&pd_bitmap_lock);
1637} 1769}
1638 1770
1639#define DEFINE_FREE_PT_FN(LVL, FN) \
1640static void free_pt_##LVL (unsigned long __pt) \
1641{ \
1642 unsigned long p; \
1643 u64 *pt; \
1644 int i; \
1645 \
1646 pt = (u64 *)__pt; \
1647 \
1648 for (i = 0; i < 512; ++i) { \
1649 /* PTE present? */ \
1650 if (!IOMMU_PTE_PRESENT(pt[i])) \
1651 continue; \
1652 \
1653 /* Large PTE? */ \
1654 if (PM_PTE_LEVEL(pt[i]) == 0 || \
1655 PM_PTE_LEVEL(pt[i]) == 7) \
1656 continue; \
1657 \
1658 p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
1659 FN(p); \
1660 } \
1661 free_page((unsigned long)pt); \
1662}
1663
1664DEFINE_FREE_PT_FN(l2, free_page)
1665DEFINE_FREE_PT_FN(l3, free_pt_l2)
1666DEFINE_FREE_PT_FN(l4, free_pt_l3)
1667DEFINE_FREE_PT_FN(l5, free_pt_l4)
1668DEFINE_FREE_PT_FN(l6, free_pt_l5)
1669
1670static void free_pagetable(struct protection_domain *domain)
1671{
1672 unsigned long root = (unsigned long)domain->pt_root;
1673
1674 switch (domain->mode) {
1675 case PAGE_MODE_NONE:
1676 break;
1677 case PAGE_MODE_1_LEVEL:
1678 free_page(root);
1679 break;
1680 case PAGE_MODE_2_LEVEL:
1681 free_pt_l2(root);
1682 break;
1683 case PAGE_MODE_3_LEVEL:
1684 free_pt_l3(root);
1685 break;
1686 case PAGE_MODE_4_LEVEL:
1687 free_pt_l4(root);
1688 break;
1689 case PAGE_MODE_5_LEVEL:
1690 free_pt_l5(root);
1691 break;
1692 case PAGE_MODE_6_LEVEL:
1693 free_pt_l6(root);
1694 break;
1695 default:
1696 BUG();
1697 }
1698}
1699
1700static void free_gcr3_tbl_level1(u64 *tbl) 1771static void free_gcr3_tbl_level1(u64 *tbl)
1701{ 1772{
1702 u64 *ptr; 1773 u64 *ptr;
@@ -2771,9 +2842,9 @@ int __init amd_iommu_init_dma_ops(void)
2771 iommu_detected = 1; 2842 iommu_detected = 1;
2772 2843
2773 if (amd_iommu_unmap_flush) 2844 if (amd_iommu_unmap_flush)
2774 pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n"); 2845 pr_info("IO/TLB flush on unmap enabled\n");
2775 else 2846 else
2776 pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n"); 2847 pr_info("Lazy IO/TLB flushing enabled\n");
2777 2848
2778 return 0; 2849 return 0;
2779 2850
@@ -2878,7 +2949,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
2878 case IOMMU_DOMAIN_DMA: 2949 case IOMMU_DOMAIN_DMA:
2879 dma_domain = dma_ops_domain_alloc(); 2950 dma_domain = dma_ops_domain_alloc();
2880 if (!dma_domain) { 2951 if (!dma_domain) {
2881 pr_err("AMD-Vi: Failed to allocate\n"); 2952 pr_err("Failed to allocate\n");
2882 return NULL; 2953 return NULL;
2883 } 2954 }
2884 pdomain = &dma_domain->domain; 2955 pdomain = &dma_domain->domain;
@@ -4299,7 +4370,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
4299 * legacy mode. So, we force legacy mode instead. 4370 * legacy mode. So, we force legacy mode instead.
4300 */ 4371 */
4301 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { 4372 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
4302 pr_debug("AMD-Vi: %s: Fall back to using intr legacy remap\n", 4373 pr_debug("%s: Fall back to using intr legacy remap\n",
4303 __func__); 4374 __func__);
4304 pi_data->is_guest_mode = false; 4375 pi_data->is_guest_mode = false;
4305 } 4376 }
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index d8f7000a466a..66123b911ec8 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -17,6 +17,8 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#define pr_fmt(fmt) "AMD-Vi: " fmt
21
20#include <linux/pci.h> 22#include <linux/pci.h>
21#include <linux/acpi.h> 23#include <linux/acpi.h>
22#include <linux/list.h> 24#include <linux/list.h>
@@ -443,9 +445,9 @@ static void iommu_disable(struct amd_iommu *iommu)
443static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) 445static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
444{ 446{
445 if (!request_mem_region(address, end, "amd_iommu")) { 447 if (!request_mem_region(address, end, "amd_iommu")) {
446 pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n", 448 pr_err("Can not reserve memory region %llx-%llx for mmio\n",
447 address, end); 449 address, end);
448 pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); 450 pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
449 return NULL; 451 return NULL;
450 } 452 }
451 453
@@ -512,7 +514,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
512 u32 ivhd_size = get_ivhd_header_size(h); 514 u32 ivhd_size = get_ivhd_header_size(h);
513 515
514 if (!ivhd_size) { 516 if (!ivhd_size) {
515 pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type); 517 pr_err("Unsupported IVHD type %#x\n", h->type);
516 return -EINVAL; 518 return -EINVAL;
517 } 519 }
518 520
@@ -553,7 +555,7 @@ static int __init check_ivrs_checksum(struct acpi_table_header *table)
553 checksum += p[i]; 555 checksum += p[i];
554 if (checksum != 0) { 556 if (checksum != 0) {
555 /* ACPI table corrupt */ 557 /* ACPI table corrupt */
556 pr_err(FW_BUG "AMD-Vi: IVRS invalid checksum\n"); 558 pr_err(FW_BUG "IVRS invalid checksum\n");
557 return -ENODEV; 559 return -ENODEV;
558 } 560 }
559 561
@@ -1028,7 +1030,7 @@ static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
1028 if (!(entry->id == id && entry->cmd_line)) 1030 if (!(entry->id == id && entry->cmd_line))
1029 continue; 1031 continue;
1030 1032
1031 pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n", 1033 pr_info("Command-line override present for %s id %d - ignoring\n",
1032 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); 1034 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1033 1035
1034 *devid = entry->devid; 1036 *devid = entry->devid;
@@ -1061,7 +1063,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
1061 !entry->cmd_line) 1063 !entry->cmd_line)
1062 continue; 1064 continue;
1063 1065
1064 pr_info("AMD-Vi: Command-line override for hid:%s uid:%s\n", 1066 pr_info("Command-line override for hid:%s uid:%s\n",
1065 hid, uid); 1067 hid, uid);
1066 *devid = entry->devid; 1068 *devid = entry->devid;
1067 return 0; 1069 return 0;
@@ -1077,7 +1079,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
1077 entry->cmd_line = cmd_line; 1079 entry->cmd_line = cmd_line;
1078 entry->root_devid = (entry->devid & (~0x7)); 1080 entry->root_devid = (entry->devid & (~0x7));
1079 1081
1080 pr_info("AMD-Vi:%s, add hid:%s, uid:%s, rdevid:%d\n", 1082 pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1081 entry->cmd_line ? "cmd" : "ivrs", 1083 entry->cmd_line ? "cmd" : "ivrs",
1082 entry->hid, entry->uid, entry->root_devid); 1084 entry->hid, entry->uid, entry->root_devid);
1083 1085
@@ -1173,7 +1175,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1173 */ 1175 */
1174 ivhd_size = get_ivhd_header_size(h); 1176 ivhd_size = get_ivhd_header_size(h);
1175 if (!ivhd_size) { 1177 if (!ivhd_size) {
1176 pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type); 1178 pr_err("Unsupported IVHD type %#x\n", h->type);
1177 return -EINVAL; 1179 return -EINVAL;
1178 } 1180 }
1179 1181
@@ -1455,7 +1457,7 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1455 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); 1457 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1456 1458
1457 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); 1459 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1458 pr_info("AMD-Vi: Applying erratum 746 workaround for IOMMU at %s\n", 1460 pr_info("Applying erratum 746 workaround for IOMMU at %s\n",
1459 dev_name(&iommu->dev->dev)); 1461 dev_name(&iommu->dev->dev));
1460 1462
1461 /* Clear the enable writing bit */ 1463 /* Clear the enable writing bit */
@@ -1486,7 +1488,7 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1486 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ 1488 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1487 iommu_write_l2(iommu, 0x47, value | BIT(0)); 1489 iommu_write_l2(iommu, 0x47, value | BIT(0));
1488 1490
1489 pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n", 1491 pr_info("Applying ATS write check workaround for IOMMU at %s\n",
1490 dev_name(&iommu->dev->dev)); 1492 dev_name(&iommu->dev->dev));
1491} 1493}
1492 1494
@@ -1506,7 +1508,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1506 iommu->index = amd_iommus_present++; 1508 iommu->index = amd_iommus_present++;
1507 1509
1508 if (unlikely(iommu->index >= MAX_IOMMUS)) { 1510 if (unlikely(iommu->index >= MAX_IOMMUS)) {
1509 WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); 1511 WARN(1, "System has more IOMMUs than supported by this driver\n");
1510 return -ENOSYS; 1512 return -ENOSYS;
1511 } 1513 }
1512 1514
@@ -1674,12 +1676,12 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1674 if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || 1676 if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
1675 (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || 1677 (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
1676 (val != val2)) { 1678 (val != val2)) {
1677 pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); 1679 pr_err("Unable to write to IOMMU perf counter.\n");
1678 amd_iommu_pc_present = false; 1680 amd_iommu_pc_present = false;
1679 return; 1681 return;
1680 } 1682 }
1681 1683
1682 pr_info("AMD-Vi: IOMMU performance counters supported\n"); 1684 pr_info("IOMMU performance counters supported\n");
1683 1685
1684 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); 1686 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1685 iommu->max_banks = (u8) ((val >> 12) & 0x3f); 1687 iommu->max_banks = (u8) ((val >> 12) & 0x3f);
@@ -1840,11 +1842,11 @@ static void print_iommu_info(void)
1840 for_each_iommu(iommu) { 1842 for_each_iommu(iommu) {
1841 int i; 1843 int i;
1842 1844
1843 pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n", 1845 pr_info("Found IOMMU at %s cap 0x%hx\n",
1844 dev_name(&iommu->dev->dev), iommu->cap_ptr); 1846 dev_name(&iommu->dev->dev), iommu->cap_ptr);
1845 1847
1846 if (iommu->cap & (1 << IOMMU_CAP_EFR)) { 1848 if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
1847 pr_info("AMD-Vi: Extended features (%#llx):\n", 1849 pr_info("Extended features (%#llx):\n",
1848 iommu->features); 1850 iommu->features);
1849 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { 1851 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
1850 if (iommu_feature(iommu, (1ULL << i))) 1852 if (iommu_feature(iommu, (1ULL << i)))
@@ -1858,11 +1860,11 @@ static void print_iommu_info(void)
1858 } 1860 }
1859 } 1861 }
1860 if (irq_remapping_enabled) { 1862 if (irq_remapping_enabled) {
1861 pr_info("AMD-Vi: Interrupt remapping enabled\n"); 1863 pr_info("Interrupt remapping enabled\n");
1862 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 1864 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
1863 pr_info("AMD-Vi: virtual APIC enabled\n"); 1865 pr_info("Virtual APIC enabled\n");
1864 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 1866 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1865 pr_info("AMD-Vi: X2APIC enabled\n"); 1867 pr_info("X2APIC enabled\n");
1866 } 1868 }
1867} 1869}
1868 1870
@@ -2376,7 +2378,7 @@ static bool __init check_ioapic_information(void)
2376 2378
2377 devid = get_ioapic_devid(id); 2379 devid = get_ioapic_devid(id);
2378 if (devid < 0) { 2380 if (devid < 0) {
2379 pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n", 2381 pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2380 fw_bug, id); 2382 fw_bug, id);
2381 ret = false; 2383 ret = false;
2382 } else if (devid == IOAPIC_SB_DEVID) { 2384 } else if (devid == IOAPIC_SB_DEVID) {
@@ -2394,11 +2396,11 @@ static bool __init check_ioapic_information(void)
2394 * when the BIOS is buggy and provides us the wrong 2396 * when the BIOS is buggy and provides us the wrong
2395 * device id for the IOAPIC in the system. 2397 * device id for the IOAPIC in the system.
2396 */ 2398 */
2397 pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug); 2399 pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2398 } 2400 }
2399 2401
2400 if (!ret) 2402 if (!ret)
2401 pr_err("AMD-Vi: Disabling interrupt remapping\n"); 2403 pr_err("Disabling interrupt remapping\n");
2402 2404
2403 return ret; 2405 return ret;
2404} 2406}
@@ -2453,7 +2455,7 @@ static int __init early_amd_iommu_init(void)
2453 return -ENODEV; 2455 return -ENODEV;
2454 else if (ACPI_FAILURE(status)) { 2456 else if (ACPI_FAILURE(status)) {
2455 const char *err = acpi_format_exception(status); 2457 const char *err = acpi_format_exception(status);
2456 pr_err("AMD-Vi: IVRS table error: %s\n", err); 2458 pr_err("IVRS table error: %s\n", err);
2457 return -EINVAL; 2459 return -EINVAL;
2458 } 2460 }
2459 2461
@@ -2606,7 +2608,7 @@ static bool detect_ivrs(void)
2606 return false; 2608 return false;
2607 else if (ACPI_FAILURE(status)) { 2609 else if (ACPI_FAILURE(status)) {
2608 const char *err = acpi_format_exception(status); 2610 const char *err = acpi_format_exception(status);
2609 pr_err("AMD-Vi: IVRS table error: %s\n", err); 2611 pr_err("IVRS table error: %s\n", err);
2610 return false; 2612 return false;
2611 } 2613 }
2612 2614
@@ -2641,7 +2643,7 @@ static int __init state_next(void)
2641 ret = early_amd_iommu_init(); 2643 ret = early_amd_iommu_init();
2642 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 2644 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
2643 if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { 2645 if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
2644 pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n"); 2646 pr_info("AMD IOMMU disabled on kernel command-line\n");
2645 free_dma_resources(); 2647 free_dma_resources();
2646 free_iommu_resources(); 2648 free_iommu_resources();
2647 init_state = IOMMU_CMDLINE_DISABLED; 2649 init_state = IOMMU_CMDLINE_DISABLED;
@@ -2788,7 +2790,7 @@ static bool amd_iommu_sme_check(void)
2788 (boot_cpu_data.microcode <= 0x080011ff)) 2790 (boot_cpu_data.microcode <= 0x080011ff))
2789 return true; 2791 return true;
2790 2792
2791 pr_notice("AMD-Vi: IOMMU not currently supported when SME is active\n"); 2793 pr_notice("IOMMU not currently supported when SME is active\n");
2792 2794
2793 return false; 2795 return false;
2794} 2796}
@@ -2873,12 +2875,12 @@ static int __init parse_ivrs_ioapic(char *str)
2873 ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); 2875 ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2874 2876
2875 if (ret != 4) { 2877 if (ret != 4) {
2876 pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str); 2878 pr_err("Invalid command line: ivrs_ioapic%s\n", str);
2877 return 1; 2879 return 1;
2878 } 2880 }
2879 2881
2880 if (early_ioapic_map_size == EARLY_MAP_SIZE) { 2882 if (early_ioapic_map_size == EARLY_MAP_SIZE) {
2881 pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", 2883 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
2882 str); 2884 str);
2883 return 1; 2885 return 1;
2884 } 2886 }
@@ -2903,12 +2905,12 @@ static int __init parse_ivrs_hpet(char *str)
2903 ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); 2905 ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2904 2906
2905 if (ret != 4) { 2907 if (ret != 4) {
2906 pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str); 2908 pr_err("Invalid command line: ivrs_hpet%s\n", str);
2907 return 1; 2909 return 1;
2908 } 2910 }
2909 2911
2910 if (early_hpet_map_size == EARLY_MAP_SIZE) { 2912 if (early_hpet_map_size == EARLY_MAP_SIZE) {
2911 pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n", 2913 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
2912 str); 2914 str);
2913 return 1; 2915 return 1;
2914 } 2916 }
@@ -2933,7 +2935,7 @@ static int __init parse_ivrs_acpihid(char *str)
2933 2935
2934 ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); 2936 ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
2935 if (ret != 4) { 2937 if (ret != 4) {
2936 pr_err("AMD-Vi: Invalid command line: ivrs_acpihid(%s)\n", str); 2938 pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
2937 return 1; 2939 return 1;
2938 } 2940 }
2939 2941
@@ -2942,7 +2944,7 @@ static int __init parse_ivrs_acpihid(char *str)
2942 uid = p; 2944 uid = p;
2943 2945
2944 if (!hid || !(*hid) || !uid) { 2946 if (!hid || !(*hid) || !uid) {
2945 pr_err("AMD-Vi: Invalid command line: hid or uid\n"); 2947 pr_err("Invalid command line: hid or uid\n");
2946 return 1; 2948 return 1;
2947 } 2949 }
2948 2950
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index e2b342e65a7b..eae0741f72dc 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -269,6 +269,7 @@
269#define PAGE_MODE_4_LEVEL 0x04 269#define PAGE_MODE_4_LEVEL 0x04
270#define PAGE_MODE_5_LEVEL 0x05 270#define PAGE_MODE_5_LEVEL 0x05
271#define PAGE_MODE_6_LEVEL 0x06 271#define PAGE_MODE_6_LEVEL 0x06
272#define PAGE_MODE_7_LEVEL 0x07
272 273
273#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) 274#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
274#define PM_LEVEL_SIZE(x) (((x) < 6) ? \ 275#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index fd552235bd13..23dae9348ace 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -16,6 +16,8 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19#define pr_fmt(fmt) "AMD-Vi: " fmt
20
19#include <linux/mmu_notifier.h> 21#include <linux/mmu_notifier.h>
20#include <linux/amd-iommu.h> 22#include <linux/amd-iommu.h>
21#include <linux/mm_types.h> 23#include <linux/mm_types.h>
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 6947ccf26512..0d284029dc73 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -20,7 +20,8 @@
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/iommu.h> 21#include <linux/iommu.h>
22#include <linux/iopoll.h> 22#include <linux/iopoll.h>
23#include <linux/module.h> 23#include <linux/init.h>
24#include <linux/moduleparam.h>
24#include <linux/msi.h> 25#include <linux/msi.h>
25#include <linux/of.h> 26#include <linux/of.h>
26#include <linux/of_address.h> 27#include <linux/of_address.h>
@@ -356,6 +357,10 @@
356#define MSI_IOVA_BASE 0x8000000 357#define MSI_IOVA_BASE 0x8000000
357#define MSI_IOVA_LENGTH 0x100000 358#define MSI_IOVA_LENGTH 0x100000
358 359
360/*
361 * not really modular, but the easiest way to keep compat with existing
362 * bootargs behaviour is to continue using module_param_named here.
363 */
359static bool disable_bypass = 1; 364static bool disable_bypass = 1;
360module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); 365module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
361MODULE_PARM_DESC(disable_bypass, 366MODULE_PARM_DESC(disable_bypass,
@@ -576,7 +581,11 @@ struct arm_smmu_device {
576 581
577 struct arm_smmu_strtab_cfg strtab_cfg; 582 struct arm_smmu_strtab_cfg strtab_cfg;
578 583
579 u32 sync_count; 584 /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
585 union {
586 u32 sync_count;
587 u64 padding;
588 };
580 589
581 /* IOMMU core code handle */ 590 /* IOMMU core code handle */
582 struct iommu_device iommu; 591 struct iommu_device iommu;
@@ -675,7 +684,13 @@ static void queue_inc_cons(struct arm_smmu_queue *q)
675 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; 684 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
676 685
677 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); 686 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
678 writel(q->cons, q->cons_reg); 687
688 /*
689 * Ensure that all CPU accesses (reads and writes) to the queue
690 * are complete before we update the cons pointer.
691 */
692 mb();
693 writel_relaxed(q->cons, q->cons_reg);
679} 694}
680 695
681static int queue_sync_prod(struct arm_smmu_queue *q) 696static int queue_sync_prod(struct arm_smmu_queue *q)
@@ -828,7 +843,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
828 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); 843 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
829 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH); 844 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
830 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); 845 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
831 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata); 846 /*
847 * Commands are written little-endian, but we want the SMMU to
848 * receive MSIData, and thus write it back to memory, in CPU
849 * byte order, so big-endian needs an extra byteswap here.
850 */
851 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
852 cpu_to_le32(ent->sync.msidata));
832 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; 853 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
833 break; 854 break;
834 default: 855 default:
@@ -1691,24 +1712,26 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1691 1712
1692static void arm_smmu_detach_dev(struct device *dev) 1713static void arm_smmu_detach_dev(struct device *dev)
1693{ 1714{
1694 struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv; 1715 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1716 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1695 1717
1696 master->ste.assigned = false; 1718 master->ste.assigned = false;
1697 arm_smmu_install_ste_for_dev(dev->iommu_fwspec); 1719 arm_smmu_install_ste_for_dev(fwspec);
1698} 1720}
1699 1721
1700static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) 1722static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1701{ 1723{
1702 int ret = 0; 1724 int ret = 0;
1725 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1703 struct arm_smmu_device *smmu; 1726 struct arm_smmu_device *smmu;
1704 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1727 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1705 struct arm_smmu_master_data *master; 1728 struct arm_smmu_master_data *master;
1706 struct arm_smmu_strtab_ent *ste; 1729 struct arm_smmu_strtab_ent *ste;
1707 1730
1708 if (!dev->iommu_fwspec) 1731 if (!fwspec)
1709 return -ENOENT; 1732 return -ENOENT;
1710 1733
1711 master = dev->iommu_fwspec->iommu_priv; 1734 master = fwspec->iommu_priv;
1712 smmu = master->smmu; 1735 smmu = master->smmu;
1713 ste = &master->ste; 1736 ste = &master->ste;
1714 1737
@@ -1748,7 +1771,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1748 ste->s2_cfg = &smmu_domain->s2_cfg; 1771 ste->s2_cfg = &smmu_domain->s2_cfg;
1749 } 1772 }
1750 1773
1751 arm_smmu_install_ste_for_dev(dev->iommu_fwspec); 1774 arm_smmu_install_ste_for_dev(fwspec);
1752out_unlock: 1775out_unlock:
1753 mutex_unlock(&smmu_domain->init_mutex); 1776 mutex_unlock(&smmu_domain->init_mutex);
1754 return ret; 1777 return ret;
@@ -1839,7 +1862,7 @@ static int arm_smmu_add_device(struct device *dev)
1839 int i, ret; 1862 int i, ret;
1840 struct arm_smmu_device *smmu; 1863 struct arm_smmu_device *smmu;
1841 struct arm_smmu_master_data *master; 1864 struct arm_smmu_master_data *master;
1842 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 1865 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1843 struct iommu_group *group; 1866 struct iommu_group *group;
1844 1867
1845 if (!fwspec || fwspec->ops != &arm_smmu_ops) 1868 if (!fwspec || fwspec->ops != &arm_smmu_ops)
@@ -1890,7 +1913,7 @@ static int arm_smmu_add_device(struct device *dev)
1890 1913
1891static void arm_smmu_remove_device(struct device *dev) 1914static void arm_smmu_remove_device(struct device *dev)
1892{ 1915{
1893 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 1916 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1894 struct arm_smmu_master_data *master; 1917 struct arm_smmu_master_data *master;
1895 struct arm_smmu_device *smmu; 1918 struct arm_smmu_device *smmu;
1896 1919
@@ -2928,37 +2951,25 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
2928 return 0; 2951 return 0;
2929} 2952}
2930 2953
2931static int arm_smmu_device_remove(struct platform_device *pdev) 2954static void arm_smmu_device_shutdown(struct platform_device *pdev)
2932{ 2955{
2933 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 2956 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2934 2957
2935 arm_smmu_device_disable(smmu); 2958 arm_smmu_device_disable(smmu);
2936
2937 return 0;
2938}
2939
2940static void arm_smmu_device_shutdown(struct platform_device *pdev)
2941{
2942 arm_smmu_device_remove(pdev);
2943} 2959}
2944 2960
2945static const struct of_device_id arm_smmu_of_match[] = { 2961static const struct of_device_id arm_smmu_of_match[] = {
2946 { .compatible = "arm,smmu-v3", }, 2962 { .compatible = "arm,smmu-v3", },
2947 { }, 2963 { },
2948}; 2964};
2949MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2950 2965
2951static struct platform_driver arm_smmu_driver = { 2966static struct platform_driver arm_smmu_driver = {
2952 .driver = { 2967 .driver = {
2953 .name = "arm-smmu-v3", 2968 .name = "arm-smmu-v3",
2954 .of_match_table = of_match_ptr(arm_smmu_of_match), 2969 .of_match_table = of_match_ptr(arm_smmu_of_match),
2970 .suppress_bind_attrs = true,
2955 }, 2971 },
2956 .probe = arm_smmu_device_probe, 2972 .probe = arm_smmu_device_probe,
2957 .remove = arm_smmu_device_remove,
2958 .shutdown = arm_smmu_device_shutdown, 2973 .shutdown = arm_smmu_device_shutdown,
2959}; 2974};
2960module_platform_driver(arm_smmu_driver); 2975builtin_platform_driver(arm_smmu_driver);
2961
2962MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2963MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2964MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5a28ae892504..af18a7e7f917 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -41,13 +41,15 @@
41#include <linux/io-64-nonatomic-hi-lo.h> 41#include <linux/io-64-nonatomic-hi-lo.h>
42#include <linux/iommu.h> 42#include <linux/iommu.h>
43#include <linux/iopoll.h> 43#include <linux/iopoll.h>
44#include <linux/module.h> 44#include <linux/init.h>
45#include <linux/moduleparam.h>
45#include <linux/of.h> 46#include <linux/of.h>
46#include <linux/of_address.h> 47#include <linux/of_address.h>
47#include <linux/of_device.h> 48#include <linux/of_device.h>
48#include <linux/of_iommu.h> 49#include <linux/of_iommu.h>
49#include <linux/pci.h> 50#include <linux/pci.h>
50#include <linux/platform_device.h> 51#include <linux/platform_device.h>
52#include <linux/pm_runtime.h>
51#include <linux/slab.h> 53#include <linux/slab.h>
52#include <linux/spinlock.h> 54#include <linux/spinlock.h>
53 55
@@ -101,6 +103,10 @@
101#define MSI_IOVA_LENGTH 0x100000 103#define MSI_IOVA_LENGTH 0x100000
102 104
103static int force_stage; 105static int force_stage;
106/*
107 * not really modular, but the easiest way to keep compat with existing
108 * bootargs behaviour is to continue using module_param() here.
109 */
104module_param(force_stage, int, S_IRUGO); 110module_param(force_stage, int, S_IRUGO);
105MODULE_PARM_DESC(force_stage, 111MODULE_PARM_DESC(force_stage,
106 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); 112 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
@@ -119,6 +125,7 @@ enum arm_smmu_implementation {
119 GENERIC_SMMU, 125 GENERIC_SMMU,
120 ARM_MMU500, 126 ARM_MMU500,
121 CAVIUM_SMMUV2, 127 CAVIUM_SMMUV2,
128 QCOM_SMMUV2,
122}; 129};
123 130
124struct arm_smmu_s2cr { 131struct arm_smmu_s2cr {
@@ -206,6 +213,8 @@ struct arm_smmu_device {
206 u32 num_global_irqs; 213 u32 num_global_irqs;
207 u32 num_context_irqs; 214 u32 num_context_irqs;
208 unsigned int *irqs; 215 unsigned int *irqs;
216 struct clk_bulk_data *clks;
217 int num_clks;
209 218
210 u32 cavium_id_base; /* Specific to Cavium */ 219 u32 cavium_id_base; /* Specific to Cavium */
211 220
@@ -267,6 +276,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
267 { 0, NULL}, 276 { 0, NULL},
268}; 277};
269 278
279static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
280{
281 if (pm_runtime_enabled(smmu->dev))
282 return pm_runtime_get_sync(smmu->dev);
283
284 return 0;
285}
286
287static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
288{
289 if (pm_runtime_enabled(smmu->dev))
290 pm_runtime_put(smmu->dev);
291}
292
270static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) 293static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
271{ 294{
272 return container_of(dom, struct arm_smmu_domain, domain); 295 return container_of(dom, struct arm_smmu_domain, domain);
@@ -926,11 +949,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
926 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 949 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
927 struct arm_smmu_device *smmu = smmu_domain->smmu; 950 struct arm_smmu_device *smmu = smmu_domain->smmu;
928 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 951 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
929 int irq; 952 int ret, irq;
930 953
931 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) 954 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
932 return; 955 return;
933 956
957 ret = arm_smmu_rpm_get(smmu);
958 if (ret < 0)
959 return;
960
934 /* 961 /*
935 * Disable the context bank and free the page tables before freeing 962 * Disable the context bank and free the page tables before freeing
936 * it. 963 * it.
@@ -945,6 +972,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
945 972
946 free_io_pgtable_ops(smmu_domain->pgtbl_ops); 973 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
947 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); 974 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
975
976 arm_smmu_rpm_put(smmu);
948} 977}
949 978
950static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) 979static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
@@ -1103,7 +1132,7 @@ static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1103 1132
1104static int arm_smmu_master_alloc_smes(struct device *dev) 1133static int arm_smmu_master_alloc_smes(struct device *dev)
1105{ 1134{
1106 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 1135 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1107 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv; 1136 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1108 struct arm_smmu_device *smmu = cfg->smmu; 1137 struct arm_smmu_device *smmu = cfg->smmu;
1109 struct arm_smmu_smr *smrs = smmu->smrs; 1138 struct arm_smmu_smr *smrs = smmu->smrs;
@@ -1206,7 +1235,7 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1206static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) 1235static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1207{ 1236{
1208 int ret; 1237 int ret;
1209 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 1238 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1210 struct arm_smmu_device *smmu; 1239 struct arm_smmu_device *smmu;
1211 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1240 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1212 1241
@@ -1226,10 +1255,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1226 return -ENODEV; 1255 return -ENODEV;
1227 1256
1228 smmu = fwspec_smmu(fwspec); 1257 smmu = fwspec_smmu(fwspec);
1258
1259 ret = arm_smmu_rpm_get(smmu);
1260 if (ret < 0)
1261 return ret;
1262
1229 /* Ensure that the domain is finalised */ 1263 /* Ensure that the domain is finalised */
1230 ret = arm_smmu_init_domain_context(domain, smmu); 1264 ret = arm_smmu_init_domain_context(domain, smmu);
1231 if (ret < 0) 1265 if (ret < 0)
1232 return ret; 1266 goto rpm_put;
1233 1267
1234 /* 1268 /*
1235 * Sanity check the domain. We don't support domains across 1269 * Sanity check the domain. We don't support domains across
@@ -1239,49 +1273,74 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1239 dev_err(dev, 1273 dev_err(dev,
1240 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", 1274 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1241 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); 1275 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1242 return -EINVAL; 1276 ret = -EINVAL;
1277 goto rpm_put;
1243 } 1278 }
1244 1279
1245 /* Looks ok, so add the device to the domain */ 1280 /* Looks ok, so add the device to the domain */
1246 return arm_smmu_domain_add_master(smmu_domain, fwspec); 1281 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1282
1283rpm_put:
1284 arm_smmu_rpm_put(smmu);
1285 return ret;
1247} 1286}
1248 1287
1249static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, 1288static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1250 phys_addr_t paddr, size_t size, int prot) 1289 phys_addr_t paddr, size_t size, int prot)
1251{ 1290{
1252 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 1291 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1292 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1293 int ret;
1253 1294
1254 if (!ops) 1295 if (!ops)
1255 return -ENODEV; 1296 return -ENODEV;
1256 1297
1257 return ops->map(ops, iova, paddr, size, prot); 1298 arm_smmu_rpm_get(smmu);
1299 ret = ops->map(ops, iova, paddr, size, prot);
1300 arm_smmu_rpm_put(smmu);
1301
1302 return ret;
1258} 1303}
1259 1304
1260static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, 1305static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1261 size_t size) 1306 size_t size)
1262{ 1307{
1263 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 1308 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1309 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1310 size_t ret;
1264 1311
1265 if (!ops) 1312 if (!ops)
1266 return 0; 1313 return 0;
1267 1314
1268 return ops->unmap(ops, iova, size); 1315 arm_smmu_rpm_get(smmu);
1316 ret = ops->unmap(ops, iova, size);
1317 arm_smmu_rpm_put(smmu);
1318
1319 return ret;
1269} 1320}
1270 1321
1271static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) 1322static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1272{ 1323{
1273 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1324 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1325 struct arm_smmu_device *smmu = smmu_domain->smmu;
1274 1326
1275 if (smmu_domain->tlb_ops) 1327 if (smmu_domain->tlb_ops) {
1328 arm_smmu_rpm_get(smmu);
1276 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); 1329 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1330 arm_smmu_rpm_put(smmu);
1331 }
1277} 1332}
1278 1333
1279static void arm_smmu_iotlb_sync(struct iommu_domain *domain) 1334static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1280{ 1335{
1281 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1336 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1337 struct arm_smmu_device *smmu = smmu_domain->smmu;
1282 1338
1283 if (smmu_domain->tlb_ops) 1339 if (smmu_domain->tlb_ops) {
1340 arm_smmu_rpm_get(smmu);
1284 smmu_domain->tlb_ops->tlb_sync(smmu_domain); 1341 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1342 arm_smmu_rpm_put(smmu);
1343 }
1285} 1344}
1286 1345
1287static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, 1346static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1296,6 +1355,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1296 u32 tmp; 1355 u32 tmp;
1297 u64 phys; 1356 u64 phys;
1298 unsigned long va, flags; 1357 unsigned long va, flags;
1358 int ret;
1359
1360 ret = arm_smmu_rpm_get(smmu);
1361 if (ret < 0)
1362 return 0;
1299 1363
1300 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); 1364 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1301 1365
@@ -1324,6 +1388,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1324 return 0; 1388 return 0;
1325 } 1389 }
1326 1390
1391 arm_smmu_rpm_put(smmu);
1392
1327 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff); 1393 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1328} 1394}
1329 1395
@@ -1380,7 +1446,7 @@ static int arm_smmu_add_device(struct device *dev)
1380{ 1446{
1381 struct arm_smmu_device *smmu; 1447 struct arm_smmu_device *smmu;
1382 struct arm_smmu_master_cfg *cfg; 1448 struct arm_smmu_master_cfg *cfg;
1383 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 1449 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1384 int i, ret; 1450 int i, ret;
1385 1451
1386 if (using_legacy_binding) { 1452 if (using_legacy_binding) {
@@ -1391,7 +1457,7 @@ static int arm_smmu_add_device(struct device *dev)
1391 * will allocate/initialise a new one. Thus we need to update fwspec for 1457 * will allocate/initialise a new one. Thus we need to update fwspec for
1392 * later use. 1458 * later use.
1393 */ 1459 */
1394 fwspec = dev->iommu_fwspec; 1460 fwspec = dev_iommu_fwspec_get(dev);
1395 if (ret) 1461 if (ret)
1396 goto out_free; 1462 goto out_free;
1397 } else if (fwspec && fwspec->ops == &arm_smmu_ops) { 1463 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
@@ -1428,12 +1494,21 @@ static int arm_smmu_add_device(struct device *dev)
1428 while (i--) 1494 while (i--)
1429 cfg->smendx[i] = INVALID_SMENDX; 1495 cfg->smendx[i] = INVALID_SMENDX;
1430 1496
1497 ret = arm_smmu_rpm_get(smmu);
1498 if (ret < 0)
1499 goto out_cfg_free;
1500
1431 ret = arm_smmu_master_alloc_smes(dev); 1501 ret = arm_smmu_master_alloc_smes(dev);
1502 arm_smmu_rpm_put(smmu);
1503
1432 if (ret) 1504 if (ret)
1433 goto out_cfg_free; 1505 goto out_cfg_free;
1434 1506
1435 iommu_device_link(&smmu->iommu, dev); 1507 iommu_device_link(&smmu->iommu, dev);
1436 1508
1509 device_link_add(dev, smmu->dev,
1510 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1511
1437 return 0; 1512 return 0;
1438 1513
1439out_cfg_free: 1514out_cfg_free:
@@ -1445,10 +1520,10 @@ out_free:
1445 1520
1446static void arm_smmu_remove_device(struct device *dev) 1521static void arm_smmu_remove_device(struct device *dev)
1447{ 1522{
1448 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 1523 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1449 struct arm_smmu_master_cfg *cfg; 1524 struct arm_smmu_master_cfg *cfg;
1450 struct arm_smmu_device *smmu; 1525 struct arm_smmu_device *smmu;
1451 1526 int ret;
1452 1527
1453 if (!fwspec || fwspec->ops != &arm_smmu_ops) 1528 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1454 return; 1529 return;
@@ -1456,8 +1531,15 @@ static void arm_smmu_remove_device(struct device *dev)
1456 cfg = fwspec->iommu_priv; 1531 cfg = fwspec->iommu_priv;
1457 smmu = cfg->smmu; 1532 smmu = cfg->smmu;
1458 1533
1534 ret = arm_smmu_rpm_get(smmu);
1535 if (ret < 0)
1536 return;
1537
1459 iommu_device_unlink(&smmu->iommu, dev); 1538 iommu_device_unlink(&smmu->iommu, dev);
1460 arm_smmu_master_free_smes(fwspec); 1539 arm_smmu_master_free_smes(fwspec);
1540
1541 arm_smmu_rpm_put(smmu);
1542
1461 iommu_group_remove_device(dev); 1543 iommu_group_remove_device(dev);
1462 kfree(fwspec->iommu_priv); 1544 kfree(fwspec->iommu_priv);
1463 iommu_fwspec_free(dev); 1545 iommu_fwspec_free(dev);
@@ -1465,7 +1547,7 @@ static void arm_smmu_remove_device(struct device *dev)
1465 1547
1466static struct iommu_group *arm_smmu_device_group(struct device *dev) 1548static struct iommu_group *arm_smmu_device_group(struct device *dev)
1467{ 1549{
1468 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 1550 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1469 struct arm_smmu_device *smmu = fwspec_smmu(fwspec); 1551 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1470 struct iommu_group *group = NULL; 1552 struct iommu_group *group = NULL;
1471 int i, idx; 1553 int i, idx;
@@ -1947,13 +2029,14 @@ struct arm_smmu_match_data {
1947}; 2029};
1948 2030
1949#define ARM_SMMU_MATCH_DATA(name, ver, imp) \ 2031#define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1950static struct arm_smmu_match_data name = { .version = ver, .model = imp } 2032static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1951 2033
1952ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU); 2034ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1953ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU); 2035ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1954ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU); 2036ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1955ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500); 2037ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1956ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2); 2038ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2039ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1957 2040
1958static const struct of_device_id arm_smmu_of_match[] = { 2041static const struct of_device_id arm_smmu_of_match[] = {
1959 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 }, 2042 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
@@ -1962,9 +2045,9 @@ static const struct of_device_id arm_smmu_of_match[] = {
1962 { .compatible = "arm,mmu-401", .data = &arm_mmu401 }, 2045 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1963 { .compatible = "arm,mmu-500", .data = &arm_mmu500 }, 2046 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1964 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 }, 2047 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2048 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1965 { }, 2049 { },
1966}; 2050};
1967MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1968 2051
1969#ifdef CONFIG_ACPI 2052#ifdef CONFIG_ACPI
1970static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) 2053static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
@@ -2150,6 +2233,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
2150 smmu->irqs[i] = irq; 2233 smmu->irqs[i] = irq;
2151 } 2234 }
2152 2235
2236 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2237 if (err < 0) {
2238 dev_err(dev, "failed to get clocks %d\n", err);
2239 return err;
2240 }
2241 smmu->num_clks = err;
2242
2243 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2244 if (err)
2245 return err;
2246
2153 err = arm_smmu_device_cfg_probe(smmu); 2247 err = arm_smmu_device_cfg_probe(smmu);
2154 if (err) 2248 if (err)
2155 return err; 2249 return err;
@@ -2200,6 +2294,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
2200 arm_smmu_test_smr_masks(smmu); 2294 arm_smmu_test_smr_masks(smmu);
2201 2295
2202 /* 2296 /*
2297 * We want to avoid touching dev->power.lock in fastpaths unless
2298 * it's really going to do something useful - pm_runtime_enabled()
2299 * can serve as an ideal proxy for that decision. So, conditionally
2300 * enable pm_runtime.
2301 */
2302 if (dev->pm_domain) {
2303 pm_runtime_set_active(dev);
2304 pm_runtime_enable(dev);
2305 }
2306
2307 /*
2203 * For ACPI and generic DT bindings, an SMMU will be probed before 2308 * For ACPI and generic DT bindings, an SMMU will be probed before
2204 * any device which might need it, so we want the bus ops in place 2309 * any device which might need it, so we want the bus ops in place
2205 * ready to handle default domain setup as soon as any SMMU exists. 2310 * ready to handle default domain setup as soon as any SMMU exists.
@@ -2224,48 +2329,82 @@ static int arm_smmu_legacy_bus_init(void)
2224} 2329}
2225device_initcall_sync(arm_smmu_legacy_bus_init); 2330device_initcall_sync(arm_smmu_legacy_bus_init);
2226 2331
2227static int arm_smmu_device_remove(struct platform_device *pdev) 2332static void arm_smmu_device_shutdown(struct platform_device *pdev)
2228{ 2333{
2229 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 2334 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2230 2335
2231 if (!smmu) 2336 if (!smmu)
2232 return -ENODEV; 2337 return;
2233 2338
2234 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) 2339 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2235 dev_err(&pdev->dev, "removing device with active domains!\n"); 2340 dev_err(&pdev->dev, "removing device with active domains!\n");
2236 2341
2342 arm_smmu_rpm_get(smmu);
2237 /* Turn the thing off */ 2343 /* Turn the thing off */
2238 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); 2344 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2239 return 0; 2345 arm_smmu_rpm_put(smmu);
2346
2347 if (pm_runtime_enabled(smmu->dev))
2348 pm_runtime_force_suspend(smmu->dev);
2349 else
2350 clk_bulk_disable(smmu->num_clks, smmu->clks);
2351
2352 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2240} 2353}
2241 2354
2242static void arm_smmu_device_shutdown(struct platform_device *pdev) 2355static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2243{ 2356{
2244 arm_smmu_device_remove(pdev); 2357 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2358 int ret;
2359
2360 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2361 if (ret)
2362 return ret;
2363
2364 arm_smmu_device_reset(smmu);
2365
2366 return 0;
2245} 2367}
2246 2368
2247static int __maybe_unused arm_smmu_pm_resume(struct device *dev) 2369static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2248{ 2370{
2249 struct arm_smmu_device *smmu = dev_get_drvdata(dev); 2371 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2250 2372
2251 arm_smmu_device_reset(smmu); 2373 clk_bulk_disable(smmu->num_clks, smmu->clks);
2374
2252 return 0; 2375 return 0;
2253} 2376}
2254 2377
2255static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); 2378static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2379{
2380 if (pm_runtime_suspended(dev))
2381 return 0;
2382
2383 return arm_smmu_runtime_resume(dev);
2384}
2385
2386static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2387{
2388 if (pm_runtime_suspended(dev))
2389 return 0;
2390
2391 return arm_smmu_runtime_suspend(dev);
2392}
2393
2394static const struct dev_pm_ops arm_smmu_pm_ops = {
2395 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2396 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2397 arm_smmu_runtime_resume, NULL)
2398};
2256 2399
2257static struct platform_driver arm_smmu_driver = { 2400static struct platform_driver arm_smmu_driver = {
2258 .driver = { 2401 .driver = {
2259 .name = "arm-smmu", 2402 .name = "arm-smmu",
2260 .of_match_table = of_match_ptr(arm_smmu_of_match), 2403 .of_match_table = of_match_ptr(arm_smmu_of_match),
2261 .pm = &arm_smmu_pm_ops, 2404 .pm = &arm_smmu_pm_ops,
2405 .suppress_bind_attrs = true,
2262 }, 2406 },
2263 .probe = arm_smmu_device_probe, 2407 .probe = arm_smmu_device_probe,
2264 .remove = arm_smmu_device_remove,
2265 .shutdown = arm_smmu_device_shutdown, 2408 .shutdown = arm_smmu_device_shutdown,
2266}; 2409};
2267module_platform_driver(arm_smmu_driver); 2410builtin_platform_driver(arm_smmu_driver);
2268
2269MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2270MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2271MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 60c7e9e9901e..d19f3d6b43c1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -175,7 +175,7 @@ EXPORT_SYMBOL(iommu_put_dma_cookie);
175void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) 175void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
176{ 176{
177 177
178 if (!is_of_node(dev->iommu_fwspec->iommu_fwnode)) 178 if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
179 iort_iommu_msi_get_resv_regions(dev, list); 179 iort_iommu_msi_get_resv_regions(dev, list);
180 180
181} 181}
@@ -447,20 +447,17 @@ static void __iommu_dma_free_pages(struct page **pages, int count)
447 kvfree(pages); 447 kvfree(pages);
448} 448}
449 449
450static struct page **__iommu_dma_alloc_pages(unsigned int count, 450static struct page **__iommu_dma_alloc_pages(struct device *dev,
451 unsigned long order_mask, gfp_t gfp) 451 unsigned int count, unsigned long order_mask, gfp_t gfp)
452{ 452{
453 struct page **pages; 453 struct page **pages;
454 unsigned int i = 0, array_size = count * sizeof(*pages); 454 unsigned int i = 0, nid = dev_to_node(dev);
455 455
456 order_mask &= (2U << MAX_ORDER) - 1; 456 order_mask &= (2U << MAX_ORDER) - 1;
457 if (!order_mask) 457 if (!order_mask)
458 return NULL; 458 return NULL;
459 459
460 if (array_size <= PAGE_SIZE) 460 pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL);
461 pages = kzalloc(array_size, GFP_KERNEL);
462 else
463 pages = vzalloc(array_size);
464 if (!pages) 461 if (!pages)
465 return NULL; 462 return NULL;
466 463
@@ -479,10 +476,12 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
479 for (order_mask &= (2U << __fls(count)) - 1; 476 for (order_mask &= (2U << __fls(count)) - 1;
480 order_mask; order_mask &= ~order_size) { 477 order_mask; order_mask &= ~order_size) {
481 unsigned int order = __fls(order_mask); 478 unsigned int order = __fls(order_mask);
479 gfp_t alloc_flags = gfp;
482 480
483 order_size = 1U << order; 481 order_size = 1U << order;
484 page = alloc_pages((order_mask - order_size) ? 482 if (order_mask > order_size)
485 gfp | __GFP_NORETRY : gfp, order); 483 alloc_flags |= __GFP_NORETRY;
484 page = alloc_pages_node(nid, alloc_flags, order);
486 if (!page) 485 if (!page)
487 continue; 486 continue;
488 if (!order) 487 if (!order)
@@ -567,7 +566,8 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
567 alloc_sizes = min_size; 566 alloc_sizes = min_size;
568 567
569 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 568 count = PAGE_ALIGN(size) >> PAGE_SHIFT;
570 pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp); 569 pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
570 gfp);
571 if (!pages) 571 if (!pages)
572 return NULL; 572 return NULL;
573 573
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 1edf2a251336..dc9f14811e0f 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1160,6 +1160,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
1160 int head, tail; 1160 int head, tail;
1161 struct q_inval *qi = iommu->qi; 1161 struct q_inval *qi = iommu->qi;
1162 int wait_index = (index + 1) % QI_LENGTH; 1162 int wait_index = (index + 1) % QI_LENGTH;
1163 int shift = qi_shift(iommu);
1163 1164
1164 if (qi->desc_status[wait_index] == QI_ABORT) 1165 if (qi->desc_status[wait_index] == QI_ABORT)
1165 return -EAGAIN; 1166 return -EAGAIN;
@@ -1173,13 +1174,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
1173 */ 1174 */
1174 if (fault & DMA_FSTS_IQE) { 1175 if (fault & DMA_FSTS_IQE) {
1175 head = readl(iommu->reg + DMAR_IQH_REG); 1176 head = readl(iommu->reg + DMAR_IQH_REG);
1176 if ((head >> DMAR_IQ_SHIFT) == index) { 1177 if ((head >> shift) == index) {
1177 pr_err("VT-d detected invalid descriptor: " 1178 struct qi_desc *desc = qi->desc + head;
1178 "low=%llx, high=%llx\n", 1179
1179 (unsigned long long)qi->desc[index].low, 1180 /*
1180 (unsigned long long)qi->desc[index].high); 1181 * desc->qw2 and desc->qw3 are either reserved or
1181 memcpy(&qi->desc[index], &qi->desc[wait_index], 1182 * used by software as private data. We won't print
1182 sizeof(struct qi_desc)); 1183 * out these two qw's for security consideration.
1184 */
1185 pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1186 (unsigned long long)desc->qw0,
1187 (unsigned long long)desc->qw1);
1188 memcpy(desc, qi->desc + (wait_index << shift),
1189 1 << shift);
1183 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); 1190 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1184 return -EINVAL; 1191 return -EINVAL;
1185 } 1192 }
@@ -1191,10 +1198,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
1191 */ 1198 */
1192 if (fault & DMA_FSTS_ITE) { 1199 if (fault & DMA_FSTS_ITE) {
1193 head = readl(iommu->reg + DMAR_IQH_REG); 1200 head = readl(iommu->reg + DMAR_IQH_REG);
1194 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; 1201 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1195 head |= 1; 1202 head |= 1;
1196 tail = readl(iommu->reg + DMAR_IQT_REG); 1203 tail = readl(iommu->reg + DMAR_IQT_REG);
1197 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; 1204 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1198 1205
1199 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); 1206 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1200 1207
@@ -1222,15 +1229,14 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
1222{ 1229{
1223 int rc; 1230 int rc;
1224 struct q_inval *qi = iommu->qi; 1231 struct q_inval *qi = iommu->qi;
1225 struct qi_desc *hw, wait_desc; 1232 int offset, shift, length;
1233 struct qi_desc wait_desc;
1226 int wait_index, index; 1234 int wait_index, index;
1227 unsigned long flags; 1235 unsigned long flags;
1228 1236
1229 if (!qi) 1237 if (!qi)
1230 return 0; 1238 return 0;
1231 1239
1232 hw = qi->desc;
1233
1234restart: 1240restart:
1235 rc = 0; 1241 rc = 0;
1236 1242
@@ -1243,16 +1249,21 @@ restart:
1243 1249
1244 index = qi->free_head; 1250 index = qi->free_head;
1245 wait_index = (index + 1) % QI_LENGTH; 1251 wait_index = (index + 1) % QI_LENGTH;
1252 shift = qi_shift(iommu);
1253 length = 1 << shift;
1246 1254
1247 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; 1255 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
1248 1256
1249 hw[index] = *desc; 1257 offset = index << shift;
1250 1258 memcpy(qi->desc + offset, desc, length);
1251 wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) | 1259 wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1252 QI_IWD_STATUS_WRITE | QI_IWD_TYPE; 1260 QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1253 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); 1261 wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1262 wait_desc.qw2 = 0;
1263 wait_desc.qw3 = 0;
1254 1264
1255 hw[wait_index] = wait_desc; 1265 offset = wait_index << shift;
1266 memcpy(qi->desc + offset, &wait_desc, length);
1256 1267
1257 qi->free_head = (qi->free_head + 2) % QI_LENGTH; 1268 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
1258 qi->free_cnt -= 2; 1269 qi->free_cnt -= 2;
@@ -1261,7 +1272,7 @@ restart:
1261 * update the HW tail register indicating the presence of 1272 * update the HW tail register indicating the presence of
1262 * new descriptors. 1273 * new descriptors.
1263 */ 1274 */
1264 writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG); 1275 writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1265 1276
1266 while (qi->desc_status[wait_index] != QI_DONE) { 1277 while (qi->desc_status[wait_index] != QI_DONE) {
1267 /* 1278 /*
@@ -1298,8 +1309,10 @@ void qi_global_iec(struct intel_iommu *iommu)
1298{ 1309{
1299 struct qi_desc desc; 1310 struct qi_desc desc;
1300 1311
1301 desc.low = QI_IEC_TYPE; 1312 desc.qw0 = QI_IEC_TYPE;
1302 desc.high = 0; 1313 desc.qw1 = 0;
1314 desc.qw2 = 0;
1315 desc.qw3 = 0;
1303 1316
1304 /* should never fail */ 1317 /* should never fail */
1305 qi_submit_sync(&desc, iommu); 1318 qi_submit_sync(&desc, iommu);
@@ -1310,9 +1323,11 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1310{ 1323{
1311 struct qi_desc desc; 1324 struct qi_desc desc;
1312 1325
1313 desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) 1326 desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1314 | QI_CC_GRAN(type) | QI_CC_TYPE; 1327 | QI_CC_GRAN(type) | QI_CC_TYPE;
1315 desc.high = 0; 1328 desc.qw1 = 0;
1329 desc.qw2 = 0;
1330 desc.qw3 = 0;
1316 1331
1317 qi_submit_sync(&desc, iommu); 1332 qi_submit_sync(&desc, iommu);
1318} 1333}
@@ -1331,10 +1346,12 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1331 if (cap_read_drain(iommu->cap)) 1346 if (cap_read_drain(iommu->cap))
1332 dr = 1; 1347 dr = 1;
1333 1348
1334 desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) 1349 desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1335 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; 1350 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1336 desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) 1351 desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1337 | QI_IOTLB_AM(size_order); 1352 | QI_IOTLB_AM(size_order);
1353 desc.qw2 = 0;
1354 desc.qw3 = 0;
1338 1355
1339 qi_submit_sync(&desc, iommu); 1356 qi_submit_sync(&desc, iommu);
1340} 1357}
@@ -1347,15 +1364,17 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1347 if (mask) { 1364 if (mask) {
1348 WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); 1365 WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1));
1349 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; 1366 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1350 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; 1367 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1351 } else 1368 } else
1352 desc.high = QI_DEV_IOTLB_ADDR(addr); 1369 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1353 1370
1354 if (qdep >= QI_DEV_IOTLB_MAX_INVS) 1371 if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1355 qdep = 0; 1372 qdep = 0;
1356 1373
1357 desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | 1374 desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1358 QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); 1375 QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1376 desc.qw2 = 0;
1377 desc.qw3 = 0;
1359 1378
1360 qi_submit_sync(&desc, iommu); 1379 qi_submit_sync(&desc, iommu);
1361} 1380}
@@ -1403,16 +1422,24 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
1403 u32 sts; 1422 u32 sts;
1404 unsigned long flags; 1423 unsigned long flags;
1405 struct q_inval *qi = iommu->qi; 1424 struct q_inval *qi = iommu->qi;
1425 u64 val = virt_to_phys(qi->desc);
1406 1426
1407 qi->free_head = qi->free_tail = 0; 1427 qi->free_head = qi->free_tail = 0;
1408 qi->free_cnt = QI_LENGTH; 1428 qi->free_cnt = QI_LENGTH;
1409 1429
1430 /*
1431 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1432 * is present.
1433 */
1434 if (ecap_smts(iommu->ecap))
1435 val |= (1 << 11) | 1;
1436
1410 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1437 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1411 1438
1412 /* write zero to the tail reg */ 1439 /* write zero to the tail reg */
1413 writel(0, iommu->reg + DMAR_IQT_REG); 1440 writel(0, iommu->reg + DMAR_IQT_REG);
1414 1441
1415 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); 1442 dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1416 1443
1417 iommu->gcmd |= DMA_GCMD_QIE; 1444 iommu->gcmd |= DMA_GCMD_QIE;
1418 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1445 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
@@ -1448,8 +1475,12 @@ int dmar_enable_qi(struct intel_iommu *iommu)
1448 1475
1449 qi = iommu->qi; 1476 qi = iommu->qi;
1450 1477
1451 1478 /*
1452 desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); 1479 * Need two pages to accommodate 256 descriptors of 256 bits each
1480 * if the remapping hardware supports scalable mode translation.
1481 */
1482 desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1483 !!ecap_smts(iommu->ecap));
1453 if (!desc_page) { 1484 if (!desc_page) {
1454 kfree(qi); 1485 kfree(qi);
1455 iommu->qi = NULL; 1486 iommu->qi = NULL;
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 63b6ce78492a..2bd9ac285c0d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -292,49 +292,6 @@ static inline void context_clear_entry(struct context_entry *context)
292} 292}
293 293
294/* 294/*
295 * 0: readable
296 * 1: writable
297 * 2-6: reserved
298 * 7: super page
299 * 8-10: available
300 * 11: snoop behavior
301 * 12-63: Host physcial address
302 */
303struct dma_pte {
304 u64 val;
305};
306
307static inline void dma_clear_pte(struct dma_pte *pte)
308{
309 pte->val = 0;
310}
311
312static inline u64 dma_pte_addr(struct dma_pte *pte)
313{
314#ifdef CONFIG_64BIT
315 return pte->val & VTD_PAGE_MASK;
316#else
317 /* Must have a full atomic 64-bit read */
318 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
319#endif
320}
321
322static inline bool dma_pte_present(struct dma_pte *pte)
323{
324 return (pte->val & 3) != 0;
325}
326
327static inline bool dma_pte_superpage(struct dma_pte *pte)
328{
329 return (pte->val & DMA_PTE_LARGE_PAGE);
330}
331
332static inline int first_pte_in_page(struct dma_pte *pte)
333{
334 return !((unsigned long)pte & ~VTD_PAGE_MASK);
335}
336
337/*
338 * This domain is a statically identity mapping domain. 295 * This domain is a statically identity mapping domain.
339 * 1. This domain creats a static 1:1 mapping to all usable memory. 296 * 1. This domain creats a static 1:1 mapping to all usable memory.
340 * 2. It maps to each iommu if successful. 297 * 2. It maps to each iommu if successful.
@@ -406,38 +363,16 @@ static int dmar_map_gfx = 1;
406static int dmar_forcedac; 363static int dmar_forcedac;
407static int intel_iommu_strict; 364static int intel_iommu_strict;
408static int intel_iommu_superpage = 1; 365static int intel_iommu_superpage = 1;
409static int intel_iommu_ecs = 1; 366static int intel_iommu_sm = 1;
410static int intel_iommu_pasid28;
411static int iommu_identity_mapping; 367static int iommu_identity_mapping;
412 368
413#define IDENTMAP_ALL 1 369#define IDENTMAP_ALL 1
414#define IDENTMAP_GFX 2 370#define IDENTMAP_GFX 2
415#define IDENTMAP_AZALIA 4 371#define IDENTMAP_AZALIA 4
416 372
417/* Broadwell and Skylake have broken ECS support — normal so-called "second 373#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
418 * level" translation of DMA requests-without-PASID doesn't actually happen 374#define pasid_supported(iommu) (sm_supported(iommu) && \
419 * unless you also set the NESTE bit in an extended context-entry. Which of 375 ecap_pasid((iommu)->ecap))
420 * course means that SVM doesn't work because it's trying to do nested
421 * translation of the physical addresses it finds in the process page tables,
422 * through the IOVA->phys mapping found in the "second level" page tables.
423 *
424 * The VT-d specification was retroactively changed to change the definition
425 * of the capability bits and pretend that Broadwell/Skylake never happened...
426 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
427 * for some reason it was the PASID capability bit which was redefined (from
428 * bit 28 on BDW/SKL to bit 40 in future).
429 *
430 * So our test for ECS needs to eschew those implementations which set the old
431 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
432 * Unless we are working around the 'pasid28' limitations, that is, by putting
433 * the device into passthrough mode for normal DMA and thus masking the bug.
434 */
435#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
436 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
437/* PASID support is thus enabled if ECS is enabled and *either* of the old
438 * or new capability bits are set. */
439#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
440 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
441 376
442int intel_iommu_gfx_mapped; 377int intel_iommu_gfx_mapped;
443EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); 378EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -448,21 +383,24 @@ static LIST_HEAD(device_domain_list);
448 383
449/* 384/*
450 * Iterate over elements in device_domain_list and call the specified 385 * Iterate over elements in device_domain_list and call the specified
451 * callback @fn against each element. This helper should only be used 386 * callback @fn against each element.
452 * in the context where the device_domain_lock has already been holden.
453 */ 387 */
454int for_each_device_domain(int (*fn)(struct device_domain_info *info, 388int for_each_device_domain(int (*fn)(struct device_domain_info *info,
455 void *data), void *data) 389 void *data), void *data)
456{ 390{
457 int ret = 0; 391 int ret = 0;
392 unsigned long flags;
458 struct device_domain_info *info; 393 struct device_domain_info *info;
459 394
460 assert_spin_locked(&device_domain_lock); 395 spin_lock_irqsave(&device_domain_lock, flags);
461 list_for_each_entry(info, &device_domain_list, global) { 396 list_for_each_entry(info, &device_domain_list, global) {
462 ret = fn(info, data); 397 ret = fn(info, data);
463 if (ret) 398 if (ret) {
399 spin_unlock_irqrestore(&device_domain_lock, flags);
464 return ret; 400 return ret;
401 }
465 } 402 }
403 spin_unlock_irqrestore(&device_domain_lock, flags);
466 404
467 return 0; 405 return 0;
468} 406}
@@ -518,15 +456,9 @@ static int __init intel_iommu_setup(char *str)
518 } else if (!strncmp(str, "sp_off", 6)) { 456 } else if (!strncmp(str, "sp_off", 6)) {
519 pr_info("Disable supported super page\n"); 457 pr_info("Disable supported super page\n");
520 intel_iommu_superpage = 0; 458 intel_iommu_superpage = 0;
521 } else if (!strncmp(str, "ecs_off", 7)) { 459 } else if (!strncmp(str, "sm_off", 6)) {
522 printk(KERN_INFO 460 pr_info("Intel-IOMMU: disable scalable mode support\n");
523 "Intel-IOMMU: disable extended context table support\n"); 461 intel_iommu_sm = 0;
524 intel_iommu_ecs = 0;
525 } else if (!strncmp(str, "pasid28", 7)) {
526 printk(KERN_INFO
527 "Intel-IOMMU: enable pre-production PASID support\n");
528 intel_iommu_pasid28 = 1;
529 iommu_identity_mapping |= IDENTMAP_GFX;
530 } else if (!strncmp(str, "tboot_noforce", 13)) { 462 } else if (!strncmp(str, "tboot_noforce", 13)) {
531 printk(KERN_INFO 463 printk(KERN_INFO
532 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); 464 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
@@ -773,7 +705,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
773 u64 *entry; 705 u64 *entry;
774 706
775 entry = &root->lo; 707 entry = &root->lo;
776 if (ecs_enabled(iommu)) { 708 if (sm_supported(iommu)) {
777 if (devfn >= 0x80) { 709 if (devfn >= 0x80) {
778 devfn -= 0x80; 710 devfn -= 0x80;
779 entry = &root->hi; 711 entry = &root->hi;
@@ -915,7 +847,7 @@ static void free_context_table(struct intel_iommu *iommu)
915 if (context) 847 if (context)
916 free_pgtable_page(context); 848 free_pgtable_page(context);
917 849
918 if (!ecs_enabled(iommu)) 850 if (!sm_supported(iommu))
919 continue; 851 continue;
920 852
921 context = iommu_context_addr(iommu, i, 0x80, 0); 853 context = iommu_context_addr(iommu, i, 0x80, 0);
@@ -1267,8 +1199,8 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
1267 unsigned long flag; 1199 unsigned long flag;
1268 1200
1269 addr = virt_to_phys(iommu->root_entry); 1201 addr = virt_to_phys(iommu->root_entry);
1270 if (ecs_enabled(iommu)) 1202 if (sm_supported(iommu))
1271 addr |= DMA_RTADDR_RTT; 1203 addr |= DMA_RTADDR_SMT;
1272 1204
1273 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1205 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1274 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); 1206 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
@@ -1282,7 +1214,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
1282 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1214 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1283} 1215}
1284 1216
1285static void iommu_flush_write_buffer(struct intel_iommu *iommu) 1217void iommu_flush_write_buffer(struct intel_iommu *iommu)
1286{ 1218{
1287 u32 val; 1219 u32 val;
1288 unsigned long flag; 1220 unsigned long flag;
@@ -1694,6 +1626,16 @@ static int iommu_init_domains(struct intel_iommu *iommu)
1694 */ 1626 */
1695 set_bit(0, iommu->domain_ids); 1627 set_bit(0, iommu->domain_ids);
1696 1628
1629 /*
1630 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1631 * entry for first-level or pass-through translation modes should
1632 * be programmed with a domain id different from those used for
1633 * second-level or nested translation. We reserve a domain id for
1634 * this purpose.
1635 */
1636 if (sm_supported(iommu))
1637 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1638
1697 return 0; 1639 return 0;
1698} 1640}
1699 1641
@@ -1758,10 +1700,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
1758 free_context_table(iommu); 1700 free_context_table(iommu);
1759 1701
1760#ifdef CONFIG_INTEL_IOMMU_SVM 1702#ifdef CONFIG_INTEL_IOMMU_SVM
1761 if (pasid_enabled(iommu)) { 1703 if (pasid_supported(iommu)) {
1762 if (ecap_prs(iommu->ecap)) 1704 if (ecap_prs(iommu->ecap))
1763 intel_svm_finish_prq(iommu); 1705 intel_svm_finish_prq(iommu);
1764 intel_svm_exit(iommu);
1765 } 1706 }
1766#endif 1707#endif
1767} 1708}
@@ -1981,8 +1922,59 @@ static void domain_exit(struct dmar_domain *domain)
1981 free_domain_mem(domain); 1922 free_domain_mem(domain);
1982} 1923}
1983 1924
1925/*
1926 * Get the PASID directory size for scalable mode context entry.
1927 * Value of X in the PDTS field of a scalable mode context entry
1928 * indicates PASID directory with 2^(X + 7) entries.
1929 */
1930static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1931{
1932 int pds, max_pde;
1933
1934 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1935 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1936 if (pds < 7)
1937 return 0;
1938
1939 return pds - 7;
1940}
1941
1942/*
1943 * Set the RID_PASID field of a scalable mode context entry. The
1944 * IOMMU hardware will use the PASID value set in this field for
1945 * DMA translations of DMA requests without PASID.
1946 */
1947static inline void
1948context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1949{
1950 context->hi |= pasid & ((1 << 20) - 1);
1951 context->hi |= (1 << 20);
1952}
1953
1954/*
1955 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1956 * entry.
1957 */
1958static inline void context_set_sm_dte(struct context_entry *context)
1959{
1960 context->lo |= (1 << 2);
1961}
1962
1963/*
1964 * Set the PRE(Page Request Enable) field of a scalable mode context
1965 * entry.
1966 */
1967static inline void context_set_sm_pre(struct context_entry *context)
1968{
1969 context->lo |= (1 << 4);
1970}
1971
1972/* Convert value to context PASID directory size field coding. */
1973#define context_pdts(pds) (((pds) & 0x7) << 9)
1974
1984static int domain_context_mapping_one(struct dmar_domain *domain, 1975static int domain_context_mapping_one(struct dmar_domain *domain,
1985 struct intel_iommu *iommu, 1976 struct intel_iommu *iommu,
1977 struct pasid_table *table,
1986 u8 bus, u8 devfn) 1978 u8 bus, u8 devfn)
1987{ 1979{
1988 u16 did = domain->iommu_did[iommu->seq_id]; 1980 u16 did = domain->iommu_did[iommu->seq_id];
@@ -1990,8 +1982,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
1990 struct device_domain_info *info = NULL; 1982 struct device_domain_info *info = NULL;
1991 struct context_entry *context; 1983 struct context_entry *context;
1992 unsigned long flags; 1984 unsigned long flags;
1993 struct dma_pte *pgd; 1985 int ret;
1994 int ret, agaw;
1995 1986
1996 WARN_ON(did == 0); 1987 WARN_ON(did == 0);
1997 1988
@@ -2037,41 +2028,67 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
2037 } 2028 }
2038 } 2029 }
2039 2030
2040 pgd = domain->pgd;
2041
2042 context_clear_entry(context); 2031 context_clear_entry(context);
2043 context_set_domain_id(context, did);
2044 2032
2045 /* 2033 if (sm_supported(iommu)) {
2046 * Skip top levels of page tables for iommu which has less agaw 2034 unsigned long pds;
2047 * than default. Unnecessary for PT mode.
2048 */
2049 if (translation != CONTEXT_TT_PASS_THROUGH) {
2050 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
2051 ret = -ENOMEM;
2052 pgd = phys_to_virt(dma_pte_addr(pgd));
2053 if (!dma_pte_present(pgd))
2054 goto out_unlock;
2055 }
2056 2035
2057 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); 2036 WARN_ON(!table);
2058 if (info && info->ats_supported) 2037
2059 translation = CONTEXT_TT_DEV_IOTLB; 2038 /* Setup the PASID DIR pointer: */
2060 else 2039 pds = context_get_sm_pds(table);
2061 translation = CONTEXT_TT_MULTI_LEVEL; 2040 context->lo = (u64)virt_to_phys(table->table) |
2041 context_pdts(pds);
2042
2043 /* Setup the RID_PASID field: */
2044 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2062 2045
2063 context_set_address_root(context, virt_to_phys(pgd));
2064 context_set_address_width(context, iommu->agaw);
2065 } else {
2066 /* 2046 /*
2067 * In pass through mode, AW must be programmed to 2047 * Setup the Device-TLB enable bit and Page request
2068 * indicate the largest AGAW value supported by 2048 * Enable bit:
2069 * hardware. And ASR is ignored by hardware.
2070 */ 2049 */
2071 context_set_address_width(context, iommu->msagaw); 2050 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2051 if (info && info->ats_supported)
2052 context_set_sm_dte(context);
2053 if (info && info->pri_supported)
2054 context_set_sm_pre(context);
2055 } else {
2056 struct dma_pte *pgd = domain->pgd;
2057 int agaw;
2058
2059 context_set_domain_id(context, did);
2060 context_set_translation_type(context, translation);
2061
2062 if (translation != CONTEXT_TT_PASS_THROUGH) {
2063 /*
2064 * Skip top levels of page tables for iommu which has
2065 * less agaw than default. Unnecessary for PT mode.
2066 */
2067 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2068 ret = -ENOMEM;
2069 pgd = phys_to_virt(dma_pte_addr(pgd));
2070 if (!dma_pte_present(pgd))
2071 goto out_unlock;
2072 }
2073
2074 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2075 if (info && info->ats_supported)
2076 translation = CONTEXT_TT_DEV_IOTLB;
2077 else
2078 translation = CONTEXT_TT_MULTI_LEVEL;
2079
2080 context_set_address_root(context, virt_to_phys(pgd));
2081 context_set_address_width(context, agaw);
2082 } else {
2083 /*
2084 * In pass through mode, AW must be programmed to
2085 * indicate the largest AGAW value supported by
2086 * hardware. And ASR is ignored by hardware.
2087 */
2088 context_set_address_width(context, iommu->msagaw);
2089 }
2072 } 2090 }
2073 2091
2074 context_set_translation_type(context, translation);
2075 context_set_fault_enable(context); 2092 context_set_fault_enable(context);
2076 context_set_present(context); 2093 context_set_present(context);
2077 domain_flush_cache(domain, context, sizeof(*context)); 2094 domain_flush_cache(domain, context, sizeof(*context));
@@ -2105,6 +2122,7 @@ out_unlock:
2105struct domain_context_mapping_data { 2122struct domain_context_mapping_data {
2106 struct dmar_domain *domain; 2123 struct dmar_domain *domain;
2107 struct intel_iommu *iommu; 2124 struct intel_iommu *iommu;
2125 struct pasid_table *table;
2108}; 2126};
2109 2127
2110static int domain_context_mapping_cb(struct pci_dev *pdev, 2128static int domain_context_mapping_cb(struct pci_dev *pdev,
@@ -2113,25 +2131,31 @@ static int domain_context_mapping_cb(struct pci_dev *pdev,
2113 struct domain_context_mapping_data *data = opaque; 2131 struct domain_context_mapping_data *data = opaque;
2114 2132
2115 return domain_context_mapping_one(data->domain, data->iommu, 2133 return domain_context_mapping_one(data->domain, data->iommu,
2116 PCI_BUS_NUM(alias), alias & 0xff); 2134 data->table, PCI_BUS_NUM(alias),
2135 alias & 0xff);
2117} 2136}
2118 2137
2119static int 2138static int
2120domain_context_mapping(struct dmar_domain *domain, struct device *dev) 2139domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2121{ 2140{
2141 struct domain_context_mapping_data data;
2142 struct pasid_table *table;
2122 struct intel_iommu *iommu; 2143 struct intel_iommu *iommu;
2123 u8 bus, devfn; 2144 u8 bus, devfn;
2124 struct domain_context_mapping_data data;
2125 2145
2126 iommu = device_to_iommu(dev, &bus, &devfn); 2146 iommu = device_to_iommu(dev, &bus, &devfn);
2127 if (!iommu) 2147 if (!iommu)
2128 return -ENODEV; 2148 return -ENODEV;
2129 2149
2150 table = intel_pasid_get_table(dev);
2151
2130 if (!dev_is_pci(dev)) 2152 if (!dev_is_pci(dev))
2131 return domain_context_mapping_one(domain, iommu, bus, devfn); 2153 return domain_context_mapping_one(domain, iommu, table,
2154 bus, devfn);
2132 2155
2133 data.domain = domain; 2156 data.domain = domain;
2134 data.iommu = iommu; 2157 data.iommu = iommu;
2158 data.table = table;
2135 2159
2136 return pci_for_each_dma_alias(to_pci_dev(dev), 2160 return pci_for_each_dma_alias(to_pci_dev(dev),
2137 &domain_context_mapping_cb, &data); 2161 &domain_context_mapping_cb, &data);
@@ -2467,8 +2491,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2467 dmar_find_matched_atsr_unit(pdev)) 2491 dmar_find_matched_atsr_unit(pdev))
2468 info->ats_supported = 1; 2492 info->ats_supported = 1;
2469 2493
2470 if (ecs_enabled(iommu)) { 2494 if (sm_supported(iommu)) {
2471 if (pasid_enabled(iommu)) { 2495 if (pasid_supported(iommu)) {
2472 int features = pci_pasid_features(pdev); 2496 int features = pci_pasid_features(pdev);
2473 if (features >= 0) 2497 if (features >= 0)
2474 info->pasid_supported = features | 1; 2498 info->pasid_supported = features | 1;
@@ -2514,16 +2538,34 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2514 list_add(&info->global, &device_domain_list); 2538 list_add(&info->global, &device_domain_list);
2515 if (dev) 2539 if (dev)
2516 dev->archdata.iommu = info; 2540 dev->archdata.iommu = info;
2541 spin_unlock_irqrestore(&device_domain_lock, flags);
2517 2542
2518 if (dev && dev_is_pci(dev) && info->pasid_supported) { 2543 /* PASID table is mandatory for a PCI device in scalable mode. */
2544 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2519 ret = intel_pasid_alloc_table(dev); 2545 ret = intel_pasid_alloc_table(dev);
2520 if (ret) { 2546 if (ret) {
2521 pr_warn("No pasid table for %s, pasid disabled\n", 2547 pr_err("PASID table allocation for %s failed\n",
2522 dev_name(dev)); 2548 dev_name(dev));
2523 info->pasid_supported = 0; 2549 dmar_remove_one_dev_info(domain, dev);
2550 return NULL;
2551 }
2552
2553 /* Setup the PASID entry for requests without PASID: */
2554 spin_lock(&iommu->lock);
2555 if (hw_pass_through && domain_type_is_si(domain))
2556 ret = intel_pasid_setup_pass_through(iommu, domain,
2557 dev, PASID_RID2PASID);
2558 else
2559 ret = intel_pasid_setup_second_level(iommu, domain,
2560 dev, PASID_RID2PASID);
2561 spin_unlock(&iommu->lock);
2562 if (ret) {
2563 pr_err("Setup RID2PASID for %s failed\n",
2564 dev_name(dev));
2565 dmar_remove_one_dev_info(domain, dev);
2566 return NULL;
2524 } 2567 }
2525 } 2568 }
2526 spin_unlock_irqrestore(&device_domain_lock, flags);
2527 2569
2528 if (dev && domain_context_mapping(domain, dev)) { 2570 if (dev && domain_context_mapping(domain, dev)) {
2529 pr_err("Domain context map for %s failed\n", dev_name(dev)); 2571 pr_err("Domain context map for %s failed\n", dev_name(dev));
@@ -3287,7 +3329,7 @@ static int __init init_dmars(void)
3287 * We need to ensure the system pasid table is no bigger 3329 * We need to ensure the system pasid table is no bigger
3288 * than the smallest supported. 3330 * than the smallest supported.
3289 */ 3331 */
3290 if (pasid_enabled(iommu)) { 3332 if (pasid_supported(iommu)) {
3291 u32 temp = 2 << ecap_pss(iommu->ecap); 3333 u32 temp = 2 << ecap_pss(iommu->ecap);
3292 3334
3293 intel_pasid_max_id = min_t(u32, temp, 3335 intel_pasid_max_id = min_t(u32, temp,
@@ -3348,7 +3390,7 @@ static int __init init_dmars(void)
3348 if (!ecap_pass_through(iommu->ecap)) 3390 if (!ecap_pass_through(iommu->ecap))
3349 hw_pass_through = 0; 3391 hw_pass_through = 0;
3350#ifdef CONFIG_INTEL_IOMMU_SVM 3392#ifdef CONFIG_INTEL_IOMMU_SVM
3351 if (pasid_enabled(iommu)) 3393 if (pasid_supported(iommu))
3352 intel_svm_init(iommu); 3394 intel_svm_init(iommu);
3353#endif 3395#endif
3354 } 3396 }
@@ -3452,7 +3494,7 @@ domains_done:
3452 iommu_flush_write_buffer(iommu); 3494 iommu_flush_write_buffer(iommu);
3453 3495
3454#ifdef CONFIG_INTEL_IOMMU_SVM 3496#ifdef CONFIG_INTEL_IOMMU_SVM
3455 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { 3497 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3456 ret = intel_svm_enable_prq(iommu); 3498 ret = intel_svm_enable_prq(iommu);
3457 if (ret) 3499 if (ret)
3458 goto free_iommu; 3500 goto free_iommu;
@@ -4335,7 +4377,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4335 goto out; 4377 goto out;
4336 4378
4337#ifdef CONFIG_INTEL_IOMMU_SVM 4379#ifdef CONFIG_INTEL_IOMMU_SVM
4338 if (pasid_enabled(iommu)) 4380 if (pasid_supported(iommu))
4339 intel_svm_init(iommu); 4381 intel_svm_init(iommu);
4340#endif 4382#endif
4341 4383
@@ -4352,7 +4394,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4352 iommu_flush_write_buffer(iommu); 4394 iommu_flush_write_buffer(iommu);
4353 4395
4354#ifdef CONFIG_INTEL_IOMMU_SVM 4396#ifdef CONFIG_INTEL_IOMMU_SVM
4355 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { 4397 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4356 ret = intel_svm_enable_prq(iommu); 4398 ret = intel_svm_enable_prq(iommu);
4357 if (ret) 4399 if (ret)
4358 goto disable_iommu; 4400 goto disable_iommu;
@@ -4927,6 +4969,10 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4927 iommu = info->iommu; 4969 iommu = info->iommu;
4928 4970
4929 if (info->dev) { 4971 if (info->dev) {
4972 if (dev_is_pci(info->dev) && sm_supported(iommu))
4973 intel_pasid_tear_down_entry(iommu, info->dev,
4974 PASID_RID2PASID);
4975
4930 iommu_disable_dev_iotlb(info); 4976 iommu_disable_dev_iotlb(info);
4931 domain_context_clear(iommu, info->dev); 4977 domain_context_clear(iommu, info->dev);
4932 intel_pasid_free_table(info->dev); 4978 intel_pasid_free_table(info->dev);
@@ -5254,19 +5300,6 @@ static void intel_iommu_put_resv_regions(struct device *dev,
5254} 5300}
5255 5301
5256#ifdef CONFIG_INTEL_IOMMU_SVM 5302#ifdef CONFIG_INTEL_IOMMU_SVM
5257#define MAX_NR_PASID_BITS (20)
5258static inline unsigned long intel_iommu_get_pts(struct device *dev)
5259{
5260 int pts, max_pasid;
5261
5262 max_pasid = intel_pasid_get_dev_max_id(dev);
5263 pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS);
5264 if (pts < 5)
5265 return 0;
5266
5267 return pts - 5;
5268}
5269
5270int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) 5303int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5271{ 5304{
5272 struct device_domain_info *info; 5305 struct device_domain_info *info;
@@ -5298,33 +5331,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
5298 sdev->sid = PCI_DEVID(info->bus, info->devfn); 5331 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5299 5332
5300 if (!(ctx_lo & CONTEXT_PASIDE)) { 5333 if (!(ctx_lo & CONTEXT_PASIDE)) {
5301 if (iommu->pasid_state_table)
5302 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5303 context[1].lo = (u64)virt_to_phys(info->pasid_table->table) |
5304 intel_iommu_get_pts(sdev->dev);
5305
5306 wmb();
5307 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5308 * extended to permit requests-with-PASID if the PASIDE bit
5309 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5310 * however, the PASIDE bit is ignored and requests-with-PASID
5311 * are unconditionally blocked. Which makes less sense.
5312 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5313 * "guest mode" translation types depending on whether ATS
5314 * is available or not. Annoyingly, we can't use the new
5315 * modes *unless* PASIDE is set. */
5316 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5317 ctx_lo &= ~CONTEXT_TT_MASK;
5318 if (info->ats_supported)
5319 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5320 else
5321 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5322 }
5323 ctx_lo |= CONTEXT_PASIDE; 5334 ctx_lo |= CONTEXT_PASIDE;
5324 if (iommu->pasid_state_table)
5325 ctx_lo |= CONTEXT_DINVE;
5326 if (info->pri_supported)
5327 ctx_lo |= CONTEXT_PRS;
5328 context[0].lo = ctx_lo; 5335 context[0].lo = ctx_lo;
5329 wmb(); 5336 wmb();
5330 iommu->flush.flush_context(iommu, sdev->did, sdev->sid, 5337 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index fe95c9bd4d33..53fe5248d8f1 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -9,6 +9,8 @@
9 9
10#define pr_fmt(fmt) "DMAR: " fmt 10#define pr_fmt(fmt) "DMAR: " fmt
11 11
12#include <linux/bitops.h>
13#include <linux/cpufeature.h>
12#include <linux/dmar.h> 14#include <linux/dmar.h>
13#include <linux/intel-iommu.h> 15#include <linux/intel-iommu.h>
14#include <linux/iommu.h> 16#include <linux/iommu.h>
@@ -123,12 +125,13 @@ int intel_pasid_alloc_table(struct device *dev)
123 struct pasid_table *pasid_table; 125 struct pasid_table *pasid_table;
124 struct pasid_table_opaque data; 126 struct pasid_table_opaque data;
125 struct page *pages; 127 struct page *pages;
126 size_t size, count; 128 int max_pasid = 0;
127 int ret, order; 129 int ret, order;
130 int size;
128 131
132 might_sleep();
129 info = dev->archdata.iommu; 133 info = dev->archdata.iommu;
130 if (WARN_ON(!info || !dev_is_pci(dev) || 134 if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
131 !info->pasid_supported || info->pasid_table))
132 return -EINVAL; 135 return -EINVAL;
133 136
134 /* DMA alias device already has a pasid table, use it: */ 137 /* DMA alias device already has a pasid table, use it: */
@@ -138,23 +141,25 @@ int intel_pasid_alloc_table(struct device *dev)
138 if (ret) 141 if (ret)
139 goto attach_out; 142 goto attach_out;
140 143
141 pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC); 144 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
142 if (!pasid_table) 145 if (!pasid_table)
143 return -ENOMEM; 146 return -ENOMEM;
144 INIT_LIST_HEAD(&pasid_table->dev); 147 INIT_LIST_HEAD(&pasid_table->dev);
145 148
146 size = sizeof(struct pasid_entry); 149 if (info->pasid_supported)
147 count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id); 150 max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)),
148 order = get_order(size * count); 151 intel_pasid_max_id);
152
153 size = max_pasid >> (PASID_PDE_SHIFT - 3);
154 order = size ? get_order(size) : 0;
149 pages = alloc_pages_node(info->iommu->node, 155 pages = alloc_pages_node(info->iommu->node,
150 GFP_ATOMIC | __GFP_ZERO, 156 GFP_KERNEL | __GFP_ZERO, order);
151 order);
152 if (!pages) 157 if (!pages)
153 return -ENOMEM; 158 return -ENOMEM;
154 159
155 pasid_table->table = page_address(pages); 160 pasid_table->table = page_address(pages);
156 pasid_table->order = order; 161 pasid_table->order = order;
157 pasid_table->max_pasid = count; 162 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
158 163
159attach_out: 164attach_out:
160 device_attach_pasid_table(info, pasid_table); 165 device_attach_pasid_table(info, pasid_table);
@@ -162,14 +167,33 @@ attach_out:
162 return 0; 167 return 0;
163} 168}
164 169
170/* Get PRESENT bit of a PASID directory entry. */
171static inline bool
172pasid_pde_is_present(struct pasid_dir_entry *pde)
173{
174 return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
175}
176
177/* Get PASID table from a PASID directory entry. */
178static inline struct pasid_entry *
179get_pasid_table_from_pde(struct pasid_dir_entry *pde)
180{
181 if (!pasid_pde_is_present(pde))
182 return NULL;
183
184 return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
185}
186
165void intel_pasid_free_table(struct device *dev) 187void intel_pasid_free_table(struct device *dev)
166{ 188{
167 struct device_domain_info *info; 189 struct device_domain_info *info;
168 struct pasid_table *pasid_table; 190 struct pasid_table *pasid_table;
191 struct pasid_dir_entry *dir;
192 struct pasid_entry *table;
193 int i, max_pde;
169 194
170 info = dev->archdata.iommu; 195 info = dev->archdata.iommu;
171 if (!info || !dev_is_pci(dev) || 196 if (!info || !dev_is_pci(dev) || !info->pasid_table)
172 !info->pasid_supported || !info->pasid_table)
173 return; 197 return;
174 198
175 pasid_table = info->pasid_table; 199 pasid_table = info->pasid_table;
@@ -178,6 +202,14 @@ void intel_pasid_free_table(struct device *dev)
178 if (!list_empty(&pasid_table->dev)) 202 if (!list_empty(&pasid_table->dev))
179 return; 203 return;
180 204
205 /* Free scalable mode PASID directory tables: */
206 dir = pasid_table->table;
207 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
208 for (i = 0; i < max_pde; i++) {
209 table = get_pasid_table_from_pde(&dir[i]);
210 free_pgtable_page(table);
211 }
212
181 free_pages((unsigned long)pasid_table->table, pasid_table->order); 213 free_pages((unsigned long)pasid_table->table, pasid_table->order);
182 kfree(pasid_table); 214 kfree(pasid_table);
183} 215}
@@ -206,17 +238,37 @@ int intel_pasid_get_dev_max_id(struct device *dev)
206 238
207struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) 239struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
208{ 240{
241 struct device_domain_info *info;
209 struct pasid_table *pasid_table; 242 struct pasid_table *pasid_table;
243 struct pasid_dir_entry *dir;
210 struct pasid_entry *entries; 244 struct pasid_entry *entries;
245 int dir_index, index;
211 246
212 pasid_table = intel_pasid_get_table(dev); 247 pasid_table = intel_pasid_get_table(dev);
213 if (WARN_ON(!pasid_table || pasid < 0 || 248 if (WARN_ON(!pasid_table || pasid < 0 ||
214 pasid >= intel_pasid_get_dev_max_id(dev))) 249 pasid >= intel_pasid_get_dev_max_id(dev)))
215 return NULL; 250 return NULL;
216 251
217 entries = pasid_table->table; 252 dir = pasid_table->table;
253 info = dev->archdata.iommu;
254 dir_index = pasid >> PASID_PDE_SHIFT;
255 index = pasid & PASID_PTE_MASK;
256
257 spin_lock(&pasid_lock);
258 entries = get_pasid_table_from_pde(&dir[dir_index]);
259 if (!entries) {
260 entries = alloc_pgtable_page(info->iommu->node);
261 if (!entries) {
262 spin_unlock(&pasid_lock);
263 return NULL;
264 }
265
266 WRITE_ONCE(dir[dir_index].val,
267 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
268 }
269 spin_unlock(&pasid_lock);
218 270
219 return &entries[pasid]; 271 return &entries[index];
220} 272}
221 273
222/* 274/*
@@ -224,10 +276,17 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
224 */ 276 */
225static inline void pasid_clear_entry(struct pasid_entry *pe) 277static inline void pasid_clear_entry(struct pasid_entry *pe)
226{ 278{
227 WRITE_ONCE(pe->val, 0); 279 WRITE_ONCE(pe->val[0], 0);
280 WRITE_ONCE(pe->val[1], 0);
281 WRITE_ONCE(pe->val[2], 0);
282 WRITE_ONCE(pe->val[3], 0);
283 WRITE_ONCE(pe->val[4], 0);
284 WRITE_ONCE(pe->val[5], 0);
285 WRITE_ONCE(pe->val[6], 0);
286 WRITE_ONCE(pe->val[7], 0);
228} 287}
229 288
230void intel_pasid_clear_entry(struct device *dev, int pasid) 289static void intel_pasid_clear_entry(struct device *dev, int pasid)
231{ 290{
232 struct pasid_entry *pe; 291 struct pasid_entry *pe;
233 292
@@ -237,3 +296,361 @@ void intel_pasid_clear_entry(struct device *dev, int pasid)
237 296
238 pasid_clear_entry(pe); 297 pasid_clear_entry(pe);
239} 298}
299
300static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
301{
302 u64 old;
303
304 old = READ_ONCE(*ptr);
305 WRITE_ONCE(*ptr, (old & ~mask) | bits);
306}
307
308/*
309 * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
310 * PASID entry.
311 */
312static inline void
313pasid_set_domain_id(struct pasid_entry *pe, u64 value)
314{
315 pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
316}
317
318/*
319 * Get domain ID value of a scalable mode PASID entry.
320 */
321static inline u16
322pasid_get_domain_id(struct pasid_entry *pe)
323{
324 return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0));
325}
326
327/*
328 * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
329 * of a scalable mode PASID entry.
330 */
331static inline void
332pasid_set_slptr(struct pasid_entry *pe, u64 value)
333{
334 pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
335}
336
337/*
338 * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
339 * entry.
340 */
341static inline void
342pasid_set_address_width(struct pasid_entry *pe, u64 value)
343{
344 pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
345}
346
347/*
348 * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
349 * of a scalable mode PASID entry.
350 */
351static inline void
352pasid_set_translation_type(struct pasid_entry *pe, u64 value)
353{
354 pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
355}
356
357/*
358 * Enable fault processing by clearing the FPD(Fault Processing
359 * Disable) field (Bit 1) of a scalable mode PASID entry.
360 */
361static inline void pasid_set_fault_enable(struct pasid_entry *pe)
362{
363 pasid_set_bits(&pe->val[0], 1 << 1, 0);
364}
365
366/*
367 * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
368 * scalable mode PASID entry.
369 */
370static inline void pasid_set_sre(struct pasid_entry *pe)
371{
372 pasid_set_bits(&pe->val[2], 1 << 0, 1);
373}
374
375/*
376 * Setup the P(Present) field (Bit 0) of a scalable mode PASID
377 * entry.
378 */
379static inline void pasid_set_present(struct pasid_entry *pe)
380{
381 pasid_set_bits(&pe->val[0], 1 << 0, 1);
382}
383
384/*
385 * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
386 * entry.
387 */
388static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
389{
390 pasid_set_bits(&pe->val[1], 1 << 23, value);
391}
392
393/*
394 * Setup the First Level Page table Pointer field (Bit 140~191)
395 * of a scalable mode PASID entry.
396 */
397static inline void
398pasid_set_flptr(struct pasid_entry *pe, u64 value)
399{
400 pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value);
401}
402
403/*
404 * Setup the First Level Paging Mode field (Bit 130~131) of a
405 * scalable mode PASID entry.
406 */
407static inline void
408pasid_set_flpm(struct pasid_entry *pe, u64 value)
409{
410 pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
411}
412
413static void
414pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
415 u16 did, int pasid)
416{
417 struct qi_desc desc;
418
419 desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
420 desc.qw1 = 0;
421 desc.qw2 = 0;
422 desc.qw3 = 0;
423
424 qi_submit_sync(&desc, iommu);
425}
426
427static void
428iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid)
429{
430 struct qi_desc desc;
431
432 desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
433 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
434 desc.qw1 = 0;
435 desc.qw2 = 0;
436 desc.qw3 = 0;
437
438 qi_submit_sync(&desc, iommu);
439}
440
441static void
442devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
443 struct device *dev, int pasid)
444{
445 struct device_domain_info *info;
446 u16 sid, qdep, pfsid;
447
448 info = dev->archdata.iommu;
449 if (!info || !info->ats_enabled)
450 return;
451
452 sid = info->bus << 8 | info->devfn;
453 qdep = info->ats_qdep;
454 pfsid = info->pfsid;
455
456 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
457}
458
459void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
460 struct device *dev, int pasid)
461{
462 struct pasid_entry *pte;
463 u16 did;
464
465 pte = intel_pasid_get_entry(dev, pasid);
466 if (WARN_ON(!pte))
467 return;
468
469 intel_pasid_clear_entry(dev, pasid);
470 did = pasid_get_domain_id(pte);
471
472 if (!ecap_coherent(iommu->ecap))
473 clflush_cache_range(pte, sizeof(*pte));
474
475 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
476 iotlb_invalidation_with_pasid(iommu, did, pasid);
477
478 /* Device IOTLB doesn't need to be flushed in caching mode. */
479 if (!cap_caching_mode(iommu->cap))
480 devtlb_invalidation_with_pasid(iommu, dev, pasid);
481}
482
483/*
484 * Set up the scalable mode pasid table entry for first only
485 * translation type.
486 */
487int intel_pasid_setup_first_level(struct intel_iommu *iommu,
488 struct device *dev, pgd_t *pgd,
489 int pasid, u16 did, int flags)
490{
491 struct pasid_entry *pte;
492
493 if (!ecap_flts(iommu->ecap)) {
494 pr_err("No first level translation support on %s\n",
495 iommu->name);
496 return -EINVAL;
497 }
498
499 pte = intel_pasid_get_entry(dev, pasid);
500 if (WARN_ON(!pte))
501 return -EINVAL;
502
503 pasid_clear_entry(pte);
504
505 /* Setup the first level page table pointer: */
506 pasid_set_flptr(pte, (u64)__pa(pgd));
507 if (flags & PASID_FLAG_SUPERVISOR_MODE) {
508 if (!ecap_srs(iommu->ecap)) {
509 pr_err("No supervisor request support on %s\n",
510 iommu->name);
511 return -EINVAL;
512 }
513 pasid_set_sre(pte);
514 }
515
516#ifdef CONFIG_X86
517 if (cpu_feature_enabled(X86_FEATURE_LA57))
518 pasid_set_flpm(pte, 1);
519#endif /* CONFIG_X86 */
520
521 pasid_set_domain_id(pte, did);
522 pasid_set_address_width(pte, iommu->agaw);
523 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
524
525 /* Setup Present and PASID Granular Transfer Type: */
526 pasid_set_translation_type(pte, 1);
527 pasid_set_present(pte);
528
529 if (!ecap_coherent(iommu->ecap))
530 clflush_cache_range(pte, sizeof(*pte));
531
532 if (cap_caching_mode(iommu->cap)) {
533 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
534 iotlb_invalidation_with_pasid(iommu, did, pasid);
535 } else {
536 iommu_flush_write_buffer(iommu);
537 }
538
539 return 0;
540}
541
542/*
543 * Set up the scalable mode pasid entry for second only translation type.
544 */
545int intel_pasid_setup_second_level(struct intel_iommu *iommu,
546 struct dmar_domain *domain,
547 struct device *dev, int pasid)
548{
549 struct pasid_entry *pte;
550 struct dma_pte *pgd;
551 u64 pgd_val;
552 int agaw;
553 u16 did;
554
555 /*
556 * If hardware advertises no support for second level
557 * translation, return directly.
558 */
559 if (!ecap_slts(iommu->ecap)) {
560 pr_err("No second level translation support on %s\n",
561 iommu->name);
562 return -EINVAL;
563 }
564
565 /*
566 * Skip top levels of page tables for iommu which has less agaw
567 * than default. Unnecessary for PT mode.
568 */
569 pgd = domain->pgd;
570 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
571 pgd = phys_to_virt(dma_pte_addr(pgd));
572 if (!dma_pte_present(pgd)) {
573 dev_err(dev, "Invalid domain page table\n");
574 return -EINVAL;
575 }
576 }
577
578 pgd_val = virt_to_phys(pgd);
579 did = domain->iommu_did[iommu->seq_id];
580
581 pte = intel_pasid_get_entry(dev, pasid);
582 if (!pte) {
583 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
584 return -ENODEV;
585 }
586
587 pasid_clear_entry(pte);
588 pasid_set_domain_id(pte, did);
589 pasid_set_slptr(pte, pgd_val);
590 pasid_set_address_width(pte, agaw);
591 pasid_set_translation_type(pte, 2);
592 pasid_set_fault_enable(pte);
593 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
594
595 /*
596 * Since it is a second level only translation setup, we should
597 * set SRE bit as well (addresses are expected to be GPAs).
598 */
599 pasid_set_sre(pte);
600 pasid_set_present(pte);
601
602 if (!ecap_coherent(iommu->ecap))
603 clflush_cache_range(pte, sizeof(*pte));
604
605 if (cap_caching_mode(iommu->cap)) {
606 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
607 iotlb_invalidation_with_pasid(iommu, did, pasid);
608 } else {
609 iommu_flush_write_buffer(iommu);
610 }
611
612 return 0;
613}
614
615/*
616 * Set up the scalable mode pasid entry for passthrough translation type.
617 */
618int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
619 struct dmar_domain *domain,
620 struct device *dev, int pasid)
621{
622 u16 did = FLPT_DEFAULT_DID;
623 struct pasid_entry *pte;
624
625 pte = intel_pasid_get_entry(dev, pasid);
626 if (!pte) {
627 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
628 return -ENODEV;
629 }
630
631 pasid_clear_entry(pte);
632 pasid_set_domain_id(pte, did);
633 pasid_set_address_width(pte, iommu->agaw);
634 pasid_set_translation_type(pte, 4);
635 pasid_set_fault_enable(pte);
636 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
637
638 /*
639 * We should set SRE bit as well since the addresses are expected
640 * to be GPAs.
641 */
642 pasid_set_sre(pte);
643 pasid_set_present(pte);
644
645 if (!ecap_coherent(iommu->ecap))
646 clflush_cache_range(pte, sizeof(*pte));
647
648 if (cap_caching_mode(iommu->cap)) {
649 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
650 iotlb_invalidation_with_pasid(iommu, did, pasid);
651 } else {
652 iommu_flush_write_buffer(iommu);
653 }
654
655 return 0;
656}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 1fb5e12b029a..23537b3f34e3 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -10,13 +10,37 @@
10#ifndef __INTEL_PASID_H 10#ifndef __INTEL_PASID_H
11#define __INTEL_PASID_H 11#define __INTEL_PASID_H
12 12
13#define PASID_RID2PASID 0x0
13#define PASID_MIN 0x1 14#define PASID_MIN 0x1
14#define PASID_MAX 0x20000 15#define PASID_MAX 0x100000
16#define PASID_PTE_MASK 0x3F
17#define PASID_PTE_PRESENT 1
18#define PDE_PFN_MASK PAGE_MASK
19#define PASID_PDE_SHIFT 6
20#define MAX_NR_PASID_BITS 20
15 21
16struct pasid_entry { 22/*
23 * Domain ID reserved for pasid entries programmed for first-level
24 * only and pass-through transfer modes.
25 */
26#define FLPT_DEFAULT_DID 1
27
28/*
29 * The SUPERVISOR_MODE flag indicates a first level translation which
30 * can be used for access to kernel addresses. It is valid only for
31 * access to the kernel's static 1:1 mapping of physical memory — not
32 * to vmalloc or even module mappings.
33 */
34#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
35
36struct pasid_dir_entry {
17 u64 val; 37 u64 val;
18}; 38};
19 39
40struct pasid_entry {
41 u64 val[8];
42};
43
20/* The representative of a PASID table */ 44/* The representative of a PASID table */
21struct pasid_table { 45struct pasid_table {
22 void *table; /* pasid table pointer */ 46 void *table; /* pasid table pointer */
@@ -34,6 +58,16 @@ void intel_pasid_free_table(struct device *dev);
34struct pasid_table *intel_pasid_get_table(struct device *dev); 58struct pasid_table *intel_pasid_get_table(struct device *dev);
35int intel_pasid_get_dev_max_id(struct device *dev); 59int intel_pasid_get_dev_max_id(struct device *dev);
36struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); 60struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
37void intel_pasid_clear_entry(struct device *dev, int pasid); 61int intel_pasid_setup_first_level(struct intel_iommu *iommu,
62 struct device *dev, pgd_t *pgd,
63 int pasid, u16 did, int flags);
64int intel_pasid_setup_second_level(struct intel_iommu *iommu,
65 struct dmar_domain *domain,
66 struct device *dev, int pasid);
67int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
68 struct dmar_domain *domain,
69 struct device *dev, int pasid);
70void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
71 struct device *dev, int pasid);
38 72
39#endif /* __INTEL_PASID_H */ 73#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 887150907526..a2a2aa4439aa 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -29,21 +29,10 @@
29 29
30#include "intel-pasid.h" 30#include "intel-pasid.h"
31 31
32#define PASID_ENTRY_P BIT_ULL(0)
33#define PASID_ENTRY_FLPM_5LP BIT_ULL(9)
34#define PASID_ENTRY_SRE BIT_ULL(11)
35
36static irqreturn_t prq_event_thread(int irq, void *d); 32static irqreturn_t prq_event_thread(int irq, void *d);
37 33
38struct pasid_state_entry {
39 u64 val;
40};
41
42int intel_svm_init(struct intel_iommu *iommu) 34int intel_svm_init(struct intel_iommu *iommu)
43{ 35{
44 struct page *pages;
45 int order;
46
47 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && 36 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
48 !cap_fl1gp_support(iommu->cap)) 37 !cap_fl1gp_support(iommu->cap))
49 return -EINVAL; 38 return -EINVAL;
@@ -52,41 +41,6 @@ int intel_svm_init(struct intel_iommu *iommu)
52 !cap_5lp_support(iommu->cap)) 41 !cap_5lp_support(iommu->cap))
53 return -EINVAL; 42 return -EINVAL;
54 43
55 /* Start at 2 because it's defined as 2^(1+PSS) */
56 iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
57
58 /* Eventually I'm promised we will get a multi-level PASID table
59 * and it won't have to be physically contiguous. Until then,
60 * limit the size because 8MiB contiguous allocations can be hard
61 * to come by. The limit of 0x20000, which is 1MiB for each of
62 * the PASID and PASID-state tables, is somewhat arbitrary. */
63 if (iommu->pasid_max > 0x20000)
64 iommu->pasid_max = 0x20000;
65
66 order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
67 if (ecap_dis(iommu->ecap)) {
68 /* Just making it explicit... */
69 BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
70 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
71 if (pages)
72 iommu->pasid_state_table = page_address(pages);
73 else
74 pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
75 iommu->name);
76 }
77
78 return 0;
79}
80
81int intel_svm_exit(struct intel_iommu *iommu)
82{
83 int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
84
85 if (iommu->pasid_state_table) {
86 free_pages((unsigned long)iommu->pasid_state_table, order);
87 iommu->pasid_state_table = NULL;
88 }
89
90 return 0; 44 return 0;
91} 45}
92 46
@@ -163,27 +117,40 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
163 * because that's the only option the hardware gives us. Despite 117 * because that's the only option the hardware gives us. Despite
164 * the fact that they are actually only accessible through one. */ 118 * the fact that they are actually only accessible through one. */
165 if (gl) 119 if (gl)
166 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | 120 desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
167 QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE; 121 QI_EIOTLB_DID(sdev->did) |
122 QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) |
123 QI_EIOTLB_TYPE;
168 else 124 else
169 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | 125 desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
170 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; 126 QI_EIOTLB_DID(sdev->did) |
171 desc.high = 0; 127 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
128 QI_EIOTLB_TYPE;
129 desc.qw1 = 0;
172 } else { 130 } else {
173 int mask = ilog2(__roundup_pow_of_two(pages)); 131 int mask = ilog2(__roundup_pow_of_two(pages));
174 132
175 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | 133 desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
176 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; 134 QI_EIOTLB_DID(sdev->did) |
177 desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) | 135 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
178 QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); 136 QI_EIOTLB_TYPE;
137 desc.qw1 = QI_EIOTLB_ADDR(address) |
138 QI_EIOTLB_GL(gl) |
139 QI_EIOTLB_IH(ih) |
140 QI_EIOTLB_AM(mask);
179 } 141 }
142 desc.qw2 = 0;
143 desc.qw3 = 0;
180 qi_submit_sync(&desc, svm->iommu); 144 qi_submit_sync(&desc, svm->iommu);
181 145
182 if (sdev->dev_iotlb) { 146 if (sdev->dev_iotlb) {
183 desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) | 147 desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
184 QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE; 148 QI_DEV_EIOTLB_SID(sdev->sid) |
149 QI_DEV_EIOTLB_QDEP(sdev->qdep) |
150 QI_DEIOTLB_TYPE;
185 if (pages == -1) { 151 if (pages == -1) {
186 desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE; 152 desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
153 QI_DEV_EIOTLB_SIZE;
187 } else if (pages > 1) { 154 } else if (pages > 1) {
188 /* The least significant zero bit indicates the size. So, 155 /* The least significant zero bit indicates the size. So,
189 * for example, an "address" value of 0x12345f000 will 156 * for example, an "address" value of 0x12345f000 will
@@ -191,10 +158,13 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
191 unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); 158 unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
192 unsigned long mask = __rounddown_pow_of_two(address ^ last); 159 unsigned long mask = __rounddown_pow_of_two(address ^ last);
193 160
194 desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE; 161 desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
162 (mask - 1)) | QI_DEV_EIOTLB_SIZE;
195 } else { 163 } else {
196 desc.high = QI_DEV_EIOTLB_ADDR(address); 164 desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
197 } 165 }
166 desc.qw2 = 0;
167 desc.qw3 = 0;
198 qi_submit_sync(&desc, svm->iommu); 168 qi_submit_sync(&desc, svm->iommu);
199 } 169 }
200} 170}
@@ -204,11 +174,6 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
204{ 174{
205 struct intel_svm_dev *sdev; 175 struct intel_svm_dev *sdev;
206 176
207 /* Try deferred invalidate if available */
208 if (svm->iommu->pasid_state_table &&
209 !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
210 return;
211
212 rcu_read_lock(); 177 rcu_read_lock();
213 list_for_each_entry_rcu(sdev, &svm->devs, list) 178 list_for_each_entry_rcu(sdev, &svm->devs, list)
214 intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl); 179 intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
@@ -234,17 +199,6 @@ static void intel_invalidate_range(struct mmu_notifier *mn,
234 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0); 199 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
235} 200}
236 201
237
238static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
239{
240 struct qi_desc desc;
241
242 desc.high = 0;
243 desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
244
245 qi_submit_sync(&desc, svm->iommu);
246}
247
248static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) 202static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
249{ 203{
250 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 204 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
@@ -264,8 +218,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
264 */ 218 */
265 rcu_read_lock(); 219 rcu_read_lock();
266 list_for_each_entry_rcu(sdev, &svm->devs, list) { 220 list_for_each_entry_rcu(sdev, &svm->devs, list) {
267 intel_pasid_clear_entry(sdev->dev, svm->pasid); 221 intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
268 intel_flush_pasid_dev(svm, sdev, svm->pasid);
269 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); 222 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
270 } 223 }
271 rcu_read_unlock(); 224 rcu_read_unlock();
@@ -284,11 +237,9 @@ static LIST_HEAD(global_svm_list);
284int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) 237int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
285{ 238{
286 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); 239 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
287 struct pasid_entry *entry;
288 struct intel_svm_dev *sdev; 240 struct intel_svm_dev *sdev;
289 struct intel_svm *svm = NULL; 241 struct intel_svm *svm = NULL;
290 struct mm_struct *mm = NULL; 242 struct mm_struct *mm = NULL;
291 u64 pasid_entry_val;
292 int pasid_max; 243 int pasid_max;
293 int ret; 244 int ret;
294 245
@@ -397,24 +348,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
397 kfree(sdev); 348 kfree(sdev);
398 goto out; 349 goto out;
399 } 350 }
400 pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P; 351 }
401 } else
402 pasid_entry_val = (u64)__pa(init_mm.pgd) |
403 PASID_ENTRY_P | PASID_ENTRY_SRE;
404 if (cpu_feature_enabled(X86_FEATURE_LA57))
405 pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
406
407 entry = intel_pasid_get_entry(dev, svm->pasid);
408 entry->val = pasid_entry_val;
409
410 wmb();
411 352
412 /* 353 spin_lock(&iommu->lock);
413 * Flush PASID cache when a PASID table entry becomes 354 ret = intel_pasid_setup_first_level(iommu, dev,
414 * present. 355 mm ? mm->pgd : init_mm.pgd,
415 */ 356 svm->pasid, FLPT_DEFAULT_DID,
416 if (cap_caching_mode(iommu->cap)) 357 mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
417 intel_flush_pasid_dev(svm, sdev, svm->pasid); 358 spin_unlock(&iommu->lock);
359 if (ret) {
360 if (mm)
361 mmu_notifier_unregister(&svm->notifier, mm);
362 intel_pasid_free_id(svm->pasid);
363 kfree(svm);
364 kfree(sdev);
365 goto out;
366 }
418 367
419 list_add_tail(&svm->list, &global_svm_list); 368 list_add_tail(&svm->list, &global_svm_list);
420 } 369 }
@@ -460,10 +409,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
460 * to use. We have a *shared* PASID table, because it's 409 * to use. We have a *shared* PASID table, because it's
461 * large and has to be physically contiguous. So it's 410 * large and has to be physically contiguous. So it's
462 * hard to be as defensive as we might like. */ 411 * hard to be as defensive as we might like. */
463 intel_flush_pasid_dev(svm, sdev, svm->pasid); 412 intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
464 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); 413 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
465 kfree_rcu(sdev, rcu); 414 kfree_rcu(sdev, rcu);
466 intel_pasid_clear_entry(dev, svm->pasid);
467 415
468 if (list_empty(&svm->devs)) { 416 if (list_empty(&svm->devs)) {
469 intel_pasid_free_id(svm->pasid); 417 intel_pasid_free_id(svm->pasid);
@@ -671,24 +619,27 @@ static irqreturn_t prq_event_thread(int irq, void *d)
671 no_pasid: 619 no_pasid:
672 if (req->lpig) { 620 if (req->lpig) {
673 /* Page Group Response */ 621 /* Page Group Response */
674 resp.low = QI_PGRP_PASID(req->pasid) | 622 resp.qw0 = QI_PGRP_PASID(req->pasid) |
675 QI_PGRP_DID((req->bus << 8) | req->devfn) | 623 QI_PGRP_DID((req->bus << 8) | req->devfn) |
676 QI_PGRP_PASID_P(req->pasid_present) | 624 QI_PGRP_PASID_P(req->pasid_present) |
677 QI_PGRP_RESP_TYPE; 625 QI_PGRP_RESP_TYPE;
678 resp.high = QI_PGRP_IDX(req->prg_index) | 626 resp.qw1 = QI_PGRP_IDX(req->prg_index) |
679 QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result); 627 QI_PGRP_PRIV(req->private) |
680 628 QI_PGRP_RESP_CODE(result);
681 qi_submit_sync(&resp, iommu);
682 } else if (req->srr) { 629 } else if (req->srr) {
683 /* Page Stream Response */ 630 /* Page Stream Response */
684 resp.low = QI_PSTRM_IDX(req->prg_index) | 631 resp.qw0 = QI_PSTRM_IDX(req->prg_index) |
685 QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) | 632 QI_PSTRM_PRIV(req->private) |
686 QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE; 633 QI_PSTRM_BUS(req->bus) |
687 resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) | 634 QI_PSTRM_PASID(req->pasid) |
635 QI_PSTRM_RESP_TYPE;
636 resp.qw1 = QI_PSTRM_ADDR(address) |
637 QI_PSTRM_DEVFN(req->devfn) |
688 QI_PSTRM_RESP_CODE(result); 638 QI_PSTRM_RESP_CODE(result);
689
690 qi_submit_sync(&resp, iommu);
691 } 639 }
640 resp.qw2 = 0;
641 resp.qw3 = 0;
642 qi_submit_sync(&resp, iommu);
692 643
693 head = (head + sizeof(*req)) & PRQ_RING_MASK; 644 head = (head + sizeof(*req)) & PRQ_RING_MASK;
694 } 645 }
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index c2d6c11431de..24d45b07f425 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -145,9 +145,11 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
145{ 145{
146 struct qi_desc desc; 146 struct qi_desc desc;
147 147
148 desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) 148 desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
149 | QI_IEC_SELECTIVE; 149 | QI_IEC_SELECTIVE;
150 desc.high = 0; 150 desc.qw1 = 0;
151 desc.qw2 = 0;
152 desc.qw3 = 0;
151 153
152 return qi_submit_sync(&desc, iommu); 154 return qi_submit_sync(&desc, iommu);
153} 155}
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 445c3bde0480..cec29bf45c9b 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -709,10 +709,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
709{ 709{
710 struct arm_v7s_io_pgtable *data; 710 struct arm_v7s_io_pgtable *data;
711 711
712#ifdef PHYS_OFFSET
713 if (upper_32_bits(PHYS_OFFSET))
714 return NULL;
715#endif
716 if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS) 712 if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
717 return NULL; 713 return NULL;
718 714
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
index 36d1a7ce7fc4..44127d54e943 100644
--- a/drivers/iommu/iommu-sysfs.c
+++ b/drivers/iommu/iommu-sysfs.c
@@ -11,7 +11,7 @@
11 11
12#include <linux/device.h> 12#include <linux/device.h>
13#include <linux/iommu.h> 13#include <linux/iommu.h>
14#include <linux/module.h> 14#include <linux/init.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17/* 17/*
@@ -22,25 +22,25 @@ static struct attribute *devices_attr[] = {
22 NULL, 22 NULL,
23}; 23};
24 24
25static const struct attribute_group iommu_devices_attr_group = { 25static const struct attribute_group devices_attr_group = {
26 .name = "devices", 26 .name = "devices",
27 .attrs = devices_attr, 27 .attrs = devices_attr,
28}; 28};
29 29
30static const struct attribute_group *iommu_dev_groups[] = { 30static const struct attribute_group *dev_groups[] = {
31 &iommu_devices_attr_group, 31 &devices_attr_group,
32 NULL, 32 NULL,
33}; 33};
34 34
35static void iommu_release_device(struct device *dev) 35static void release_device(struct device *dev)
36{ 36{
37 kfree(dev); 37 kfree(dev);
38} 38}
39 39
40static struct class iommu_class = { 40static struct class iommu_class = {
41 .name = "iommu", 41 .name = "iommu",
42 .dev_release = iommu_release_device, 42 .dev_release = release_device,
43 .dev_groups = iommu_dev_groups, 43 .dev_groups = dev_groups,
44}; 44};
45 45
46static int __init iommu_dev_init(void) 46static int __init iommu_dev_init(void)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index edbdf5d6962c..3ed4db334341 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -22,7 +22,8 @@
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <linux/bug.h> 23#include <linux/bug.h>
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/module.h> 25#include <linux/init.h>
26#include <linux/export.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
27#include <linux/errno.h> 28#include <linux/errno.h>
28#include <linux/iommu.h> 29#include <linux/iommu.h>
@@ -110,6 +111,27 @@ void iommu_device_unregister(struct iommu_device *iommu)
110 spin_unlock(&iommu_device_lock); 111 spin_unlock(&iommu_device_lock);
111} 112}
112 113
114int iommu_probe_device(struct device *dev)
115{
116 const struct iommu_ops *ops = dev->bus->iommu_ops;
117 int ret = -EINVAL;
118
119 WARN_ON(dev->iommu_group);
120
121 if (ops)
122 ret = ops->add_device(dev);
123
124 return ret;
125}
126
127void iommu_release_device(struct device *dev)
128{
129 const struct iommu_ops *ops = dev->bus->iommu_ops;
130
131 if (dev->iommu_group)
132 ops->remove_device(dev);
133}
134
113static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, 135static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
114 unsigned type); 136 unsigned type);
115static int __iommu_attach_device(struct iommu_domain *domain, 137static int __iommu_attach_device(struct iommu_domain *domain,
@@ -1117,16 +1139,7 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
1117 1139
1118static int add_iommu_group(struct device *dev, void *data) 1140static int add_iommu_group(struct device *dev, void *data)
1119{ 1141{
1120 struct iommu_callback_data *cb = data; 1142 int ret = iommu_probe_device(dev);
1121 const struct iommu_ops *ops = cb->ops;
1122 int ret;
1123
1124 if (!ops->add_device)
1125 return 0;
1126
1127 WARN_ON(dev->iommu_group);
1128
1129 ret = ops->add_device(dev);
1130 1143
1131 /* 1144 /*
1132 * We ignore -ENODEV errors for now, as they just mean that the 1145 * We ignore -ENODEV errors for now, as they just mean that the
@@ -1141,11 +1154,7 @@ static int add_iommu_group(struct device *dev, void *data)
1141 1154
1142static int remove_iommu_group(struct device *dev, void *data) 1155static int remove_iommu_group(struct device *dev, void *data)
1143{ 1156{
1144 struct iommu_callback_data *cb = data; 1157 iommu_release_device(dev);
1145 const struct iommu_ops *ops = cb->ops;
1146
1147 if (ops->remove_device && dev->iommu_group)
1148 ops->remove_device(dev);
1149 1158
1150 return 0; 1159 return 0;
1151} 1160}
@@ -1153,27 +1162,22 @@ static int remove_iommu_group(struct device *dev, void *data)
1153static int iommu_bus_notifier(struct notifier_block *nb, 1162static int iommu_bus_notifier(struct notifier_block *nb,
1154 unsigned long action, void *data) 1163 unsigned long action, void *data)
1155{ 1164{
1165 unsigned long group_action = 0;
1156 struct device *dev = data; 1166 struct device *dev = data;
1157 const struct iommu_ops *ops = dev->bus->iommu_ops;
1158 struct iommu_group *group; 1167 struct iommu_group *group;
1159 unsigned long group_action = 0;
1160 1168
1161 /* 1169 /*
1162 * ADD/DEL call into iommu driver ops if provided, which may 1170 * ADD/DEL call into iommu driver ops if provided, which may
1163 * result in ADD/DEL notifiers to group->notifier 1171 * result in ADD/DEL notifiers to group->notifier
1164 */ 1172 */
1165 if (action == BUS_NOTIFY_ADD_DEVICE) { 1173 if (action == BUS_NOTIFY_ADD_DEVICE) {
1166 if (ops->add_device) { 1174 int ret;
1167 int ret;
1168 1175
1169 ret = ops->add_device(dev); 1176 ret = iommu_probe_device(dev);
1170 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1177 return (ret) ? NOTIFY_DONE : NOTIFY_OK;
1171 }
1172 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1178 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
1173 if (ops->remove_device && dev->iommu_group) { 1179 iommu_release_device(dev);
1174 ops->remove_device(dev); 1180 return NOTIFY_OK;
1175 return 0;
1176 }
1177 } 1181 }
1178 1182
1179 /* 1183 /*
@@ -1712,33 +1716,32 @@ EXPORT_SYMBOL_GPL(iommu_unmap_fast);
1712size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 1716size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
1713 struct scatterlist *sg, unsigned int nents, int prot) 1717 struct scatterlist *sg, unsigned int nents, int prot)
1714{ 1718{
1715 struct scatterlist *s; 1719 size_t len = 0, mapped = 0;
1716 size_t mapped = 0; 1720 phys_addr_t start;
1717 unsigned int i, min_pagesz; 1721 unsigned int i = 0;
1718 int ret; 1722 int ret;
1719 1723
1720 if (unlikely(domain->pgsize_bitmap == 0UL)) 1724 while (i <= nents) {
1721 return 0; 1725 phys_addr_t s_phys = sg_phys(sg);
1722 1726
1723 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 1727 if (len && s_phys != start + len) {
1724 1728 ret = iommu_map(domain, iova + mapped, start, len, prot);
1725 for_each_sg(sg, s, nents, i) { 1729 if (ret)
1726 phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; 1730 goto out_err;
1727 1731
1728 /* 1732 mapped += len;
1729 * We are mapping on IOMMU page boundaries, so offset within 1733 len = 0;
1730 * the page must be 0. However, the IOMMU may support pages 1734 }
1731 * smaller than PAGE_SIZE, so s->offset may still represent
1732 * an offset of that boundary within the CPU page.
1733 */
1734 if (!IS_ALIGNED(s->offset, min_pagesz))
1735 goto out_err;
1736 1735
1737 ret = iommu_map(domain, iova + mapped, phys, s->length, prot); 1736 if (len) {
1738 if (ret) 1737 len += sg->length;
1739 goto out_err; 1738 } else {
1739 len = sg->length;
1740 start = s_phys;
1741 }
1740 1742
1741 mapped += s->length; 1743 if (++i < nents)
1744 sg = sg_next(sg);
1742 } 1745 }
1743 1746
1744 return mapped; 1747 return mapped;
@@ -1976,7 +1979,7 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
1976int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, 1979int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
1977 const struct iommu_ops *ops) 1980 const struct iommu_ops *ops)
1978{ 1981{
1979 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 1982 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1980 1983
1981 if (fwspec) 1984 if (fwspec)
1982 return ops == fwspec->ops ? 0 : -EINVAL; 1985 return ops == fwspec->ops ? 0 : -EINVAL;
@@ -1988,26 +1991,26 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
1988 of_node_get(to_of_node(iommu_fwnode)); 1991 of_node_get(to_of_node(iommu_fwnode));
1989 fwspec->iommu_fwnode = iommu_fwnode; 1992 fwspec->iommu_fwnode = iommu_fwnode;
1990 fwspec->ops = ops; 1993 fwspec->ops = ops;
1991 dev->iommu_fwspec = fwspec; 1994 dev_iommu_fwspec_set(dev, fwspec);
1992 return 0; 1995 return 0;
1993} 1996}
1994EXPORT_SYMBOL_GPL(iommu_fwspec_init); 1997EXPORT_SYMBOL_GPL(iommu_fwspec_init);
1995 1998
1996void iommu_fwspec_free(struct device *dev) 1999void iommu_fwspec_free(struct device *dev)
1997{ 2000{
1998 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 2001 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1999 2002
2000 if (fwspec) { 2003 if (fwspec) {
2001 fwnode_handle_put(fwspec->iommu_fwnode); 2004 fwnode_handle_put(fwspec->iommu_fwnode);
2002 kfree(fwspec); 2005 kfree(fwspec);
2003 dev->iommu_fwspec = NULL; 2006 dev_iommu_fwspec_set(dev, NULL);
2004 } 2007 }
2005} 2008}
2006EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2009EXPORT_SYMBOL_GPL(iommu_fwspec_free);
2007 2010
2008int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) 2011int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
2009{ 2012{
2010 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 2013 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2011 size_t size; 2014 size_t size;
2012 int i; 2015 int i;
2013 2016
@@ -2016,11 +2019,11 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
2016 2019
2017 size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]); 2020 size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]);
2018 if (size > sizeof(*fwspec)) { 2021 if (size > sizeof(*fwspec)) {
2019 fwspec = krealloc(dev->iommu_fwspec, size, GFP_KERNEL); 2022 fwspec = krealloc(fwspec, size, GFP_KERNEL);
2020 if (!fwspec) 2023 if (!fwspec)
2021 return -ENOMEM; 2024 return -ENOMEM;
2022 2025
2023 dev->iommu_fwspec = fwspec; 2026 dev_iommu_fwspec_set(dev, fwspec);
2024 } 2027 }
2025 2028
2026 for (i = 0; i < num_ids; i++) 2029 for (i = 0; i < num_ids; i++)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index ddf3a492e1d5..7a4529c61c19 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * IPMMU VMSA 3 * IOMMU API for Renesas VMSA-compatible IPMMU
4 * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
4 * 5 *
5 * Copyright (C) 2014 Renesas Electronics Corporation 6 * Copyright (C) 2014 Renesas Electronics Corporation
6 */ 7 */
@@ -11,10 +12,10 @@
11#include <linux/dma-mapping.h> 12#include <linux/dma-mapping.h>
12#include <linux/err.h> 13#include <linux/err.h>
13#include <linux/export.h> 14#include <linux/export.h>
15#include <linux/init.h>
14#include <linux/interrupt.h> 16#include <linux/interrupt.h>
15#include <linux/io.h> 17#include <linux/io.h>
16#include <linux/iommu.h> 18#include <linux/iommu.h>
17#include <linux/module.h>
18#include <linux/of.h> 19#include <linux/of.h>
19#include <linux/of_device.h> 20#include <linux/of_device.h>
20#include <linux/of_iommu.h> 21#include <linux/of_iommu.h>
@@ -81,7 +82,9 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
81 82
82static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) 83static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev)
83{ 84{
84 return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; 85 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
86
87 return fwspec ? fwspec->iommu_priv : NULL;
85} 88}
86 89
87#define TLB_LOOP_TIMEOUT 100 /* 100us */ 90#define TLB_LOOP_TIMEOUT 100 /* 100us */
@@ -643,7 +646,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
643static int ipmmu_attach_device(struct iommu_domain *io_domain, 646static int ipmmu_attach_device(struct iommu_domain *io_domain,
644 struct device *dev) 647 struct device *dev)
645{ 648{
646 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 649 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
647 struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); 650 struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
648 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); 651 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
649 unsigned int i; 652 unsigned int i;
@@ -692,7 +695,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
692static void ipmmu_detach_device(struct iommu_domain *io_domain, 695static void ipmmu_detach_device(struct iommu_domain *io_domain,
693 struct device *dev) 696 struct device *dev)
694{ 697{
695 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 698 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
696 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); 699 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
697 unsigned int i; 700 unsigned int i;
698 701
@@ -744,36 +747,71 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
744static int ipmmu_init_platform_device(struct device *dev, 747static int ipmmu_init_platform_device(struct device *dev,
745 struct of_phandle_args *args) 748 struct of_phandle_args *args)
746{ 749{
750 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
747 struct platform_device *ipmmu_pdev; 751 struct platform_device *ipmmu_pdev;
748 752
749 ipmmu_pdev = of_find_device_by_node(args->np); 753 ipmmu_pdev = of_find_device_by_node(args->np);
750 if (!ipmmu_pdev) 754 if (!ipmmu_pdev)
751 return -ENODEV; 755 return -ENODEV;
752 756
753 dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev); 757 fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
754 return 0;
755}
756 758
757static bool ipmmu_slave_whitelist(struct device *dev) 759 return 0;
758{
759 /* By default, do not allow use of IPMMU */
760 return false;
761} 760}
762 761
763static const struct soc_device_attribute soc_rcar_gen3[] = { 762static const struct soc_device_attribute soc_rcar_gen3[] = {
763 { .soc_id = "r8a774a1", },
764 { .soc_id = "r8a774c0", },
764 { .soc_id = "r8a7795", }, 765 { .soc_id = "r8a7795", },
765 { .soc_id = "r8a7796", }, 766 { .soc_id = "r8a7796", },
766 { .soc_id = "r8a77965", }, 767 { .soc_id = "r8a77965", },
767 { .soc_id = "r8a77970", }, 768 { .soc_id = "r8a77970", },
769 { .soc_id = "r8a77990", },
770 { .soc_id = "r8a77995", },
771 { /* sentinel */ }
772};
773
774static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = {
775 { .soc_id = "r8a774c0", },
776 { .soc_id = "r8a7795", .revision = "ES3.*" },
777 { .soc_id = "r8a77965", },
778 { .soc_id = "r8a77990", },
768 { .soc_id = "r8a77995", }, 779 { .soc_id = "r8a77995", },
769 { /* sentinel */ } 780 { /* sentinel */ }
770}; 781};
771 782
783static const char * const rcar_gen3_slave_whitelist[] = {
784};
785
786static bool ipmmu_slave_whitelist(struct device *dev)
787{
788 unsigned int i;
789
790 /*
791 * For R-Car Gen3 use a white list to opt-in slave devices.
792 * For Other SoCs, this returns true anyway.
793 */
794 if (!soc_device_match(soc_rcar_gen3))
795 return true;
796
797 /* Check whether this R-Car Gen3 can use the IPMMU correctly or not */
798 if (!soc_device_match(soc_rcar_gen3_whitelist))
799 return false;
800
801 /* Check whether this slave device can work with the IPMMU */
802 for (i = 0; i < ARRAY_SIZE(rcar_gen3_slave_whitelist); i++) {
803 if (!strcmp(dev_name(dev), rcar_gen3_slave_whitelist[i]))
804 return true;
805 }
806
807 /* Otherwise, do not allow use of IPMMU */
808 return false;
809}
810
772static int ipmmu_of_xlate(struct device *dev, 811static int ipmmu_of_xlate(struct device *dev,
773 struct of_phandle_args *spec) 812 struct of_phandle_args *spec)
774{ 813{
775 /* For R-Car Gen3 use a white list to opt-in slave devices */ 814 if (!ipmmu_slave_whitelist(dev))
776 if (soc_device_match(soc_rcar_gen3) && !ipmmu_slave_whitelist(dev))
777 return -ENODEV; 815 return -ENODEV;
778 816
779 iommu_fwspec_add_ids(dev, spec->args, 1); 817 iommu_fwspec_add_ids(dev, spec->args, 1);
@@ -941,6 +979,12 @@ static const struct of_device_id ipmmu_of_ids[] = {
941 .compatible = "renesas,ipmmu-vmsa", 979 .compatible = "renesas,ipmmu-vmsa",
942 .data = &ipmmu_features_default, 980 .data = &ipmmu_features_default,
943 }, { 981 }, {
982 .compatible = "renesas,ipmmu-r8a774a1",
983 .data = &ipmmu_features_rcar_gen3,
984 }, {
985 .compatible = "renesas,ipmmu-r8a774c0",
986 .data = &ipmmu_features_rcar_gen3,
987 }, {
944 .compatible = "renesas,ipmmu-r8a7795", 988 .compatible = "renesas,ipmmu-r8a7795",
945 .data = &ipmmu_features_rcar_gen3, 989 .data = &ipmmu_features_rcar_gen3,
946 }, { 990 }, {
@@ -953,6 +997,9 @@ static const struct of_device_id ipmmu_of_ids[] = {
953 .compatible = "renesas,ipmmu-r8a77970", 997 .compatible = "renesas,ipmmu-r8a77970",
954 .data = &ipmmu_features_rcar_gen3, 998 .data = &ipmmu_features_rcar_gen3,
955 }, { 999 }, {
1000 .compatible = "renesas,ipmmu-r8a77990",
1001 .data = &ipmmu_features_rcar_gen3,
1002 }, {
956 .compatible = "renesas,ipmmu-r8a77995", 1003 .compatible = "renesas,ipmmu-r8a77995",
957 .data = &ipmmu_features_rcar_gen3, 1004 .data = &ipmmu_features_rcar_gen3,
958 }, { 1005 }, {
@@ -960,8 +1007,6 @@ static const struct of_device_id ipmmu_of_ids[] = {
960 }, 1007 },
961}; 1008};
962 1009
963MODULE_DEVICE_TABLE(of, ipmmu_of_ids);
964
965static int ipmmu_probe(struct platform_device *pdev) 1010static int ipmmu_probe(struct platform_device *pdev)
966{ 1011{
967 struct ipmmu_vmsa_device *mmu; 1012 struct ipmmu_vmsa_device *mmu;
@@ -1132,15 +1177,4 @@ static int __init ipmmu_init(void)
1132 setup_done = true; 1177 setup_done = true;
1133 return 0; 1178 return 0;
1134} 1179}
1135
1136static void __exit ipmmu_exit(void)
1137{
1138 return platform_driver_unregister(&ipmmu_driver);
1139}
1140
1141subsys_initcall(ipmmu_init); 1180subsys_initcall(ipmmu_init);
1142module_exit(ipmmu_exit);
1143
1144MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
1145MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
1146MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 7d0f3074d41d..b94ebd42edd8 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -1,4 +1,3 @@
1#include <linux/seq_file.h>
2#include <linux/cpumask.h> 1#include <linux/cpumask.h>
3#include <linux/kernel.h> 2#include <linux/kernel.h>
4#include <linux/string.h> 3#include <linux/string.h>
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index fc5f0b53adaf..fc4270733f11 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -1,5 +1,7 @@
1/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. 1/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
2 * 2 *
3 * Author: Stepan Moskovchenko <stepanm@codeaurora.org>
4 *
3 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and 6 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation. 7 * only version 2 as published by the Free Software Foundation.
@@ -17,7 +19,7 @@
17 19
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19#include <linux/kernel.h> 21#include <linux/kernel.h>
20#include <linux/module.h> 22#include <linux/init.h>
21#include <linux/platform_device.h> 23#include <linux/platform_device.h>
22#include <linux/errno.h> 24#include <linux/errno.h>
23#include <linux/io.h> 25#include <linux/io.h>
@@ -861,14 +863,5 @@ static int __init msm_iommu_driver_init(void)
861 863
862 return ret; 864 return ret;
863} 865}
864
865static void __exit msm_iommu_driver_exit(void)
866{
867 platform_driver_unregister(&msm_iommu_driver);
868}
869
870subsys_initcall(msm_iommu_driver_init); 866subsys_initcall(msm_iommu_driver_init);
871module_exit(msm_iommu_driver_exit);
872 867
873MODULE_LICENSE("GPL v2");
874MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 44bd5b9166bb..de3e02277b70 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -113,7 +113,7 @@ struct mtk_iommu_domain {
113 struct iommu_domain domain; 113 struct iommu_domain domain;
114}; 114};
115 115
116static struct iommu_ops mtk_iommu_ops; 116static const struct iommu_ops mtk_iommu_ops;
117 117
118static LIST_HEAD(m4ulist); /* List all the M4U HWs */ 118static LIST_HEAD(m4ulist); /* List all the M4U HWs */
119 119
@@ -244,7 +244,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
244{ 244{
245 struct mtk_smi_larb_iommu *larb_mmu; 245 struct mtk_smi_larb_iommu *larb_mmu;
246 unsigned int larbid, portid; 246 unsigned int larbid, portid;
247 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 247 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
248 int i; 248 int i;
249 249
250 for (i = 0; i < fwspec->num_ids; ++i) { 250 for (i = 0; i < fwspec->num_ids; ++i) {
@@ -336,7 +336,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
336 struct device *dev) 336 struct device *dev)
337{ 337{
338 struct mtk_iommu_domain *dom = to_mtk_domain(domain); 338 struct mtk_iommu_domain *dom = to_mtk_domain(domain);
339 struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; 339 struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
340 340
341 if (!data) 341 if (!data)
342 return -ENODEV; 342 return -ENODEV;
@@ -355,7 +355,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
355static void mtk_iommu_detach_device(struct iommu_domain *domain, 355static void mtk_iommu_detach_device(struct iommu_domain *domain,
356 struct device *dev) 356 struct device *dev)
357{ 357{
358 struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; 358 struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
359 359
360 if (!data) 360 if (!data)
361 return; 361 return;
@@ -417,13 +417,14 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
417 417
418static int mtk_iommu_add_device(struct device *dev) 418static int mtk_iommu_add_device(struct device *dev)
419{ 419{
420 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
420 struct mtk_iommu_data *data; 421 struct mtk_iommu_data *data;
421 struct iommu_group *group; 422 struct iommu_group *group;
422 423
423 if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) 424 if (!fwspec || fwspec->ops != &mtk_iommu_ops)
424 return -ENODEV; /* Not a iommu client device */ 425 return -ENODEV; /* Not a iommu client device */
425 426
426 data = dev->iommu_fwspec->iommu_priv; 427 data = fwspec->iommu_priv;
427 iommu_device_link(&data->iommu, dev); 428 iommu_device_link(&data->iommu, dev);
428 429
429 group = iommu_group_get_for_dev(dev); 430 group = iommu_group_get_for_dev(dev);
@@ -436,12 +437,13 @@ static int mtk_iommu_add_device(struct device *dev)
436 437
437static void mtk_iommu_remove_device(struct device *dev) 438static void mtk_iommu_remove_device(struct device *dev)
438{ 439{
440 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
439 struct mtk_iommu_data *data; 441 struct mtk_iommu_data *data;
440 442
441 if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) 443 if (!fwspec || fwspec->ops != &mtk_iommu_ops)
442 return; 444 return;
443 445
444 data = dev->iommu_fwspec->iommu_priv; 446 data = fwspec->iommu_priv;
445 iommu_device_unlink(&data->iommu, dev); 447 iommu_device_unlink(&data->iommu, dev);
446 448
447 iommu_group_remove_device(dev); 449 iommu_group_remove_device(dev);
@@ -468,6 +470,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev)
468 470
469static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) 471static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
470{ 472{
473 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
471 struct platform_device *m4updev; 474 struct platform_device *m4updev;
472 475
473 if (args->args_count != 1) { 476 if (args->args_count != 1) {
@@ -476,19 +479,19 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
476 return -EINVAL; 479 return -EINVAL;
477 } 480 }
478 481
479 if (!dev->iommu_fwspec->iommu_priv) { 482 if (!fwspec->iommu_priv) {
480 /* Get the m4u device */ 483 /* Get the m4u device */
481 m4updev = of_find_device_by_node(args->np); 484 m4updev = of_find_device_by_node(args->np);
482 if (WARN_ON(!m4updev)) 485 if (WARN_ON(!m4updev))
483 return -EINVAL; 486 return -EINVAL;
484 487
485 dev->iommu_fwspec->iommu_priv = platform_get_drvdata(m4updev); 488 fwspec->iommu_priv = platform_get_drvdata(m4updev);
486 } 489 }
487 490
488 return iommu_fwspec_add_ids(dev, args->args, 1); 491 return iommu_fwspec_add_ids(dev, args->args, 1);
489} 492}
490 493
491static struct iommu_ops mtk_iommu_ops = { 494static const struct iommu_ops mtk_iommu_ops = {
492 .domain_alloc = mtk_iommu_domain_alloc, 495 .domain_alloc = mtk_iommu_domain_alloc,
493 .domain_free = mtk_iommu_domain_free, 496 .domain_free = mtk_iommu_domain_free,
494 .attach_dev = mtk_iommu_attach_device, 497 .attach_dev = mtk_iommu_attach_device,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 0e780848f59b..6ede4286b835 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -1,4 +1,6 @@
1/* 1/*
2 * IOMMU API for MTK architected m4u v1 implementations
3 *
2 * Copyright (c) 2015-2016 MediaTek Inc. 4 * Copyright (c) 2015-2016 MediaTek Inc.
3 * Author: Honghui Zhang <honghui.zhang@mediatek.com> 5 * Author: Honghui Zhang <honghui.zhang@mediatek.com>
4 * 6 *
@@ -35,7 +37,7 @@
35#include <linux/spinlock.h> 37#include <linux/spinlock.h>
36#include <asm/barrier.h> 38#include <asm/barrier.h>
37#include <asm/dma-iommu.h> 39#include <asm/dma-iommu.h>
38#include <linux/module.h> 40#include <linux/init.h>
39#include <dt-bindings/memory/mt2701-larb-port.h> 41#include <dt-bindings/memory/mt2701-larb-port.h>
40#include <soc/mediatek/smi.h> 42#include <soc/mediatek/smi.h>
41#include "mtk_iommu.h" 43#include "mtk_iommu.h"
@@ -206,7 +208,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
206{ 208{
207 struct mtk_smi_larb_iommu *larb_mmu; 209 struct mtk_smi_larb_iommu *larb_mmu;
208 unsigned int larbid, portid; 210 unsigned int larbid, portid;
209 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 211 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
210 int i; 212 int i;
211 213
212 for (i = 0; i < fwspec->num_ids; ++i) { 214 for (i = 0; i < fwspec->num_ids; ++i) {
@@ -271,7 +273,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
271 struct device *dev) 273 struct device *dev)
272{ 274{
273 struct mtk_iommu_domain *dom = to_mtk_domain(domain); 275 struct mtk_iommu_domain *dom = to_mtk_domain(domain);
274 struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; 276 struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
275 int ret; 277 int ret;
276 278
277 if (!data) 279 if (!data)
@@ -293,7 +295,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
293static void mtk_iommu_detach_device(struct iommu_domain *domain, 295static void mtk_iommu_detach_device(struct iommu_domain *domain,
294 struct device *dev) 296 struct device *dev)
295{ 297{
296 struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; 298 struct mtk_iommu_data *data = dev_iommu_fwspec_get(dev)->iommu_priv;
297 299
298 if (!data) 300 if (!data)
299 return; 301 return;
@@ -362,7 +364,7 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
362 return pa; 364 return pa;
363} 365}
364 366
365static struct iommu_ops mtk_iommu_ops; 367static const struct iommu_ops mtk_iommu_ops;
366 368
367/* 369/*
368 * MTK generation one iommu HW only support one iommu domain, and all the client 370 * MTK generation one iommu HW only support one iommu domain, and all the client
@@ -371,6 +373,7 @@ static struct iommu_ops mtk_iommu_ops;
371static int mtk_iommu_create_mapping(struct device *dev, 373static int mtk_iommu_create_mapping(struct device *dev,
372 struct of_phandle_args *args) 374 struct of_phandle_args *args)
373{ 375{
376 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
374 struct mtk_iommu_data *data; 377 struct mtk_iommu_data *data;
375 struct platform_device *m4updev; 378 struct platform_device *m4updev;
376 struct dma_iommu_mapping *mtk_mapping; 379 struct dma_iommu_mapping *mtk_mapping;
@@ -383,28 +386,29 @@ static int mtk_iommu_create_mapping(struct device *dev,
383 return -EINVAL; 386 return -EINVAL;
384 } 387 }
385 388
386 if (!dev->iommu_fwspec) { 389 if (!fwspec) {
387 ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_ops); 390 ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_ops);
388 if (ret) 391 if (ret)
389 return ret; 392 return ret;
390 } else if (dev->iommu_fwspec->ops != &mtk_iommu_ops) { 393 fwspec = dev_iommu_fwspec_get(dev);
394 } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_ops) {
391 return -EINVAL; 395 return -EINVAL;
392 } 396 }
393 397
394 if (!dev->iommu_fwspec->iommu_priv) { 398 if (!fwspec->iommu_priv) {
395 /* Get the m4u device */ 399 /* Get the m4u device */
396 m4updev = of_find_device_by_node(args->np); 400 m4updev = of_find_device_by_node(args->np);
397 if (WARN_ON(!m4updev)) 401 if (WARN_ON(!m4updev))
398 return -EINVAL; 402 return -EINVAL;
399 403
400 dev->iommu_fwspec->iommu_priv = platform_get_drvdata(m4updev); 404 fwspec->iommu_priv = platform_get_drvdata(m4updev);
401 } 405 }
402 406
403 ret = iommu_fwspec_add_ids(dev, args->args, 1); 407 ret = iommu_fwspec_add_ids(dev, args->args, 1);
404 if (ret) 408 if (ret)
405 return ret; 409 return ret;
406 410
407 data = dev->iommu_fwspec->iommu_priv; 411 data = fwspec->iommu_priv;
408 m4udev = data->dev; 412 m4udev = data->dev;
409 mtk_mapping = m4udev->archdata.iommu; 413 mtk_mapping = m4udev->archdata.iommu;
410 if (!mtk_mapping) { 414 if (!mtk_mapping) {
@@ -422,6 +426,7 @@ static int mtk_iommu_create_mapping(struct device *dev,
422 426
423static int mtk_iommu_add_device(struct device *dev) 427static int mtk_iommu_add_device(struct device *dev)
424{ 428{
429 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
425 struct dma_iommu_mapping *mtk_mapping; 430 struct dma_iommu_mapping *mtk_mapping;
426 struct of_phandle_args iommu_spec; 431 struct of_phandle_args iommu_spec;
427 struct of_phandle_iterator it; 432 struct of_phandle_iterator it;
@@ -440,7 +445,7 @@ static int mtk_iommu_add_device(struct device *dev)
440 of_node_put(iommu_spec.np); 445 of_node_put(iommu_spec.np);
441 } 446 }
442 447
443 if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) 448 if (!fwspec || fwspec->ops != &mtk_iommu_ops)
444 return -ENODEV; /* Not a iommu client device */ 449 return -ENODEV; /* Not a iommu client device */
445 450
446 /* 451 /*
@@ -458,7 +463,7 @@ static int mtk_iommu_add_device(struct device *dev)
458 if (err) 463 if (err)
459 return err; 464 return err;
460 465
461 data = dev->iommu_fwspec->iommu_priv; 466 data = fwspec->iommu_priv;
462 mtk_mapping = data->dev->archdata.iommu; 467 mtk_mapping = data->dev->archdata.iommu;
463 err = arm_iommu_attach_device(dev, mtk_mapping); 468 err = arm_iommu_attach_device(dev, mtk_mapping);
464 if (err) { 469 if (err) {
@@ -471,12 +476,13 @@ static int mtk_iommu_add_device(struct device *dev)
471 476
472static void mtk_iommu_remove_device(struct device *dev) 477static void mtk_iommu_remove_device(struct device *dev)
473{ 478{
479 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
474 struct mtk_iommu_data *data; 480 struct mtk_iommu_data *data;
475 481
476 if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) 482 if (!fwspec || fwspec->ops != &mtk_iommu_ops)
477 return; 483 return;
478 484
479 data = dev->iommu_fwspec->iommu_priv; 485 data = fwspec->iommu_priv;
480 iommu_device_unlink(&data->iommu, dev); 486 iommu_device_unlink(&data->iommu, dev);
481 487
482 iommu_group_remove_device(dev); 488 iommu_group_remove_device(dev);
@@ -524,7 +530,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
524 return 0; 530 return 0;
525} 531}
526 532
527static struct iommu_ops mtk_iommu_ops = { 533static const struct iommu_ops mtk_iommu_ops = {
528 .domain_alloc = mtk_iommu_domain_alloc, 534 .domain_alloc = mtk_iommu_domain_alloc,
529 .domain_free = mtk_iommu_domain_free, 535 .domain_free = mtk_iommu_domain_free,
530 .attach_dev = mtk_iommu_attach_device, 536 .attach_dev = mtk_iommu_attach_device,
@@ -704,15 +710,4 @@ static int __init m4u_init(void)
704{ 710{
705 return platform_driver_register(&mtk_iommu_driver); 711 return platform_driver_register(&mtk_iommu_driver);
706} 712}
707
708static void __exit m4u_exit(void)
709{
710 return platform_driver_unregister(&mtk_iommu_driver);
711}
712
713subsys_initcall(m4u_init); 713subsys_initcall(m4u_init);
714module_exit(m4u_exit);
715
716MODULE_DESCRIPTION("IOMMU API for MTK architected m4u v1 implementations");
717MODULE_AUTHOR("Honghui Zhang <honghui.zhang@mediatek.com>");
718MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index c5dd63072529..d8947b28db2d 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -164,7 +164,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
164 struct device_node *master_np) 164 struct device_node *master_np)
165{ 165{
166 const struct iommu_ops *ops = NULL; 166 const struct iommu_ops *ops = NULL;
167 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 167 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
168 int err = NO_IOMMU; 168 int err = NO_IOMMU;
169 169
170 if (!master_np) 170 if (!master_np)
@@ -208,20 +208,24 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
208 } 208 }
209 } 209 }
210 210
211
211 /* 212 /*
212 * Two success conditions can be represented by non-negative err here: 213 * Two success conditions can be represented by non-negative err here:
213 * >0 : there is no IOMMU, or one was unavailable for non-fatal reasons 214 * >0 : there is no IOMMU, or one was unavailable for non-fatal reasons
214 * 0 : we found an IOMMU, and dev->fwspec is initialised appropriately 215 * 0 : we found an IOMMU, and dev->fwspec is initialised appropriately
215 * <0 : any actual error 216 * <0 : any actual error
216 */ 217 */
217 if (!err) 218 if (!err) {
218 ops = dev->iommu_fwspec->ops; 219 /* The fwspec pointer changed, read it again */
220 fwspec = dev_iommu_fwspec_get(dev);
221 ops = fwspec->ops;
222 }
219 /* 223 /*
220 * If we have reason to believe the IOMMU driver missed the initial 224 * If we have reason to believe the IOMMU driver missed the initial
221 * add_device callback for dev, replay it to get things in order. 225 * probe for dev, replay it to get things in order.
222 */ 226 */
223 if (ops && ops->add_device && dev->bus && !dev->iommu_group) 227 if (dev->bus && !device_iommu_mapped(dev))
224 err = ops->add_device(dev); 228 err = iommu_probe_device(dev);
225 229
226 /* Ignore all other errors apart from EPROBE_DEFER */ 230 /* Ignore all other errors apart from EPROBE_DEFER */
227 if (err == -EPROBE_DEFER) { 231 if (err == -EPROBE_DEFER) {
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 50217548c3b8..4abc0ef522a8 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -159,7 +159,7 @@ static size_t omap_dump_tlb_entries(struct omap_iommu *obj, struct seq_file *s)
159 return 0; 159 return 0;
160} 160}
161 161
162static int debug_read_tlb(struct seq_file *s, void *data) 162static int tlb_show(struct seq_file *s, void *data)
163{ 163{
164 struct omap_iommu *obj = s->private; 164 struct omap_iommu *obj = s->private;
165 165
@@ -210,7 +210,7 @@ static void dump_ioptable(struct seq_file *s)
210 spin_unlock(&obj->page_table_lock); 210 spin_unlock(&obj->page_table_lock);
211} 211}
212 212
213static int debug_read_pagetable(struct seq_file *s, void *data) 213static int pagetable_show(struct seq_file *s, void *data)
214{ 214{
215 struct omap_iommu *obj = s->private; 215 struct omap_iommu *obj = s->private;
216 216
@@ -228,35 +228,22 @@ static int debug_read_pagetable(struct seq_file *s, void *data)
228 return 0; 228 return 0;
229} 229}
230 230
231#define DEBUG_SEQ_FOPS_RO(name) \
232 static int debug_open_##name(struct inode *inode, struct file *file) \
233 { \
234 return single_open(file, debug_read_##name, inode->i_private); \
235 } \
236 \
237 static const struct file_operations debug_##name##_fops = { \
238 .open = debug_open_##name, \
239 .read = seq_read, \
240 .llseek = seq_lseek, \
241 .release = single_release, \
242 }
243
244#define DEBUG_FOPS_RO(name) \ 231#define DEBUG_FOPS_RO(name) \
245 static const struct file_operations debug_##name##_fops = { \ 232 static const struct file_operations name##_fops = { \
246 .open = simple_open, \ 233 .open = simple_open, \
247 .read = debug_read_##name, \ 234 .read = debug_read_##name, \
248 .llseek = generic_file_llseek, \ 235 .llseek = generic_file_llseek, \
249 } 236 }
250 237
251DEBUG_FOPS_RO(regs); 238DEBUG_FOPS_RO(regs);
252DEBUG_SEQ_FOPS_RO(tlb); 239DEFINE_SHOW_ATTRIBUTE(tlb);
253DEBUG_SEQ_FOPS_RO(pagetable); 240DEFINE_SHOW_ATTRIBUTE(pagetable);
254 241
255#define __DEBUG_ADD_FILE(attr, mode) \ 242#define __DEBUG_ADD_FILE(attr, mode) \
256 { \ 243 { \
257 struct dentry *dent; \ 244 struct dentry *dent; \
258 dent = debugfs_create_file(#attr, mode, obj->debug_dir, \ 245 dent = debugfs_create_file(#attr, mode, obj->debug_dir, \
259 obj, &debug_##attr##_fops); \ 246 obj, &attr##_fops); \
260 if (!dent) \ 247 if (!dent) \
261 goto err; \ 248 goto err; \
262 } 249 }
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index ee70e9921cf1..d8595f0a987d 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -29,7 +29,7 @@
29#include <linux/iommu.h> 29#include <linux/iommu.h>
30#include <linux/iopoll.h> 30#include <linux/iopoll.h>
31#include <linux/kconfig.h> 31#include <linux/kconfig.h>
32#include <linux/module.h> 32#include <linux/init.h>
33#include <linux/mutex.h> 33#include <linux/mutex.h>
34#include <linux/of.h> 34#include <linux/of.h>
35#include <linux/of_address.h> 35#include <linux/of_address.h>
@@ -354,7 +354,8 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain)
354 354
355static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) 355static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
356{ 356{
357 struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); 357 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
358 struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
358 struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); 359 struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
359 int ret; 360 int ret;
360 361
@@ -365,7 +366,7 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
365 366
366 /* Ensure that the domain is finalized */ 367 /* Ensure that the domain is finalized */
367 pm_runtime_get_sync(qcom_iommu->dev); 368 pm_runtime_get_sync(qcom_iommu->dev);
368 ret = qcom_iommu_init_domain(domain, qcom_iommu, dev->iommu_fwspec); 369 ret = qcom_iommu_init_domain(domain, qcom_iommu, fwspec);
369 pm_runtime_put_sync(qcom_iommu->dev); 370 pm_runtime_put_sync(qcom_iommu->dev);
370 if (ret < 0) 371 if (ret < 0)
371 return ret; 372 return ret;
@@ -387,7 +388,7 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
387 388
388static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) 389static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev)
389{ 390{
390 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 391 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
391 struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec); 392 struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec);
392 struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); 393 struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
393 unsigned i; 394 unsigned i;
@@ -500,7 +501,7 @@ static bool qcom_iommu_capable(enum iommu_cap cap)
500 501
501static int qcom_iommu_add_device(struct device *dev) 502static int qcom_iommu_add_device(struct device *dev)
502{ 503{
503 struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); 504 struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev));
504 struct iommu_group *group; 505 struct iommu_group *group;
505 struct device_link *link; 506 struct device_link *link;
506 507
@@ -531,7 +532,7 @@ static int qcom_iommu_add_device(struct device *dev)
531 532
532static void qcom_iommu_remove_device(struct device *dev) 533static void qcom_iommu_remove_device(struct device *dev)
533{ 534{
534 struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); 535 struct qcom_iommu_dev *qcom_iommu = to_iommu(dev_iommu_fwspec_get(dev));
535 536
536 if (!qcom_iommu) 537 if (!qcom_iommu)
537 return; 538 return;
@@ -543,6 +544,7 @@ static void qcom_iommu_remove_device(struct device *dev)
543 544
544static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) 545static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
545{ 546{
547 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
546 struct qcom_iommu_dev *qcom_iommu; 548 struct qcom_iommu_dev *qcom_iommu;
547 struct platform_device *iommu_pdev; 549 struct platform_device *iommu_pdev;
548 unsigned asid = args->args[0]; 550 unsigned asid = args->args[0];
@@ -568,14 +570,14 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
568 WARN_ON(asid > qcom_iommu->num_ctxs)) 570 WARN_ON(asid > qcom_iommu->num_ctxs))
569 return -EINVAL; 571 return -EINVAL;
570 572
571 if (!dev->iommu_fwspec->iommu_priv) { 573 if (!fwspec->iommu_priv) {
572 dev->iommu_fwspec->iommu_priv = qcom_iommu; 574 fwspec->iommu_priv = qcom_iommu;
573 } else { 575 } else {
574 /* make sure devices iommus dt node isn't referring to 576 /* make sure devices iommus dt node isn't referring to
575 * multiple different iommu devices. Multiple context 577 * multiple different iommu devices. Multiple context
576 * banks are ok, but multiple devices are not: 578 * banks are ok, but multiple devices are not:
577 */ 579 */
578 if (WARN_ON(qcom_iommu != dev->iommu_fwspec->iommu_priv)) 580 if (WARN_ON(qcom_iommu != fwspec->iommu_priv))
579 return -EINVAL; 581 return -EINVAL;
580 } 582 }
581 583
@@ -908,7 +910,6 @@ static const struct of_device_id qcom_iommu_of_match[] = {
908 { .compatible = "qcom,msm-iommu-v1" }, 910 { .compatible = "qcom,msm-iommu-v1" },
909 { /* sentinel */ } 911 { /* sentinel */ }
910}; 912};
911MODULE_DEVICE_TABLE(of, qcom_iommu_of_match);
912 913
913static struct platform_driver qcom_iommu_driver = { 914static struct platform_driver qcom_iommu_driver = {
914 .driver = { 915 .driver = {
@@ -934,15 +935,4 @@ static int __init qcom_iommu_init(void)
934 935
935 return ret; 936 return ret;
936} 937}
937 938device_initcall(qcom_iommu_init);
938static void __exit qcom_iommu_exit(void)
939{
940 platform_driver_unregister(&qcom_iommu_driver);
941 platform_driver_unregister(&qcom_iommu_ctx_driver);
942}
943
944module_init(qcom_iommu_init);
945module_exit(qcom_iommu_exit);
946
947MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations");
948MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index ad3e2b97469e..c9ba9f377f63 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1,4 +1,9 @@
1/* 1/*
2 * IOMMU API for Rockchip
3 *
4 * Module Authors: Simon Xue <xxm@rock-chips.com>
5 * Daniel Kurtz <djkurtz@chromium.org>
6 *
2 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
@@ -17,7 +22,7 @@
17#include <linux/iopoll.h> 22#include <linux/iopoll.h>
18#include <linux/list.h> 23#include <linux/list.h>
19#include <linux/mm.h> 24#include <linux/mm.h>
20#include <linux/module.h> 25#include <linux/init.h>
21#include <linux/of.h> 26#include <linux/of.h>
22#include <linux/of_iommu.h> 27#include <linux/of_iommu.h>
23#include <linux/of_platform.h> 28#include <linux/of_platform.h>
@@ -1281,7 +1286,6 @@ static const struct of_device_id rk_iommu_dt_ids[] = {
1281 { .compatible = "rockchip,iommu" }, 1286 { .compatible = "rockchip,iommu" },
1282 { /* sentinel */ } 1287 { /* sentinel */ }
1283}; 1288};
1284MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
1285 1289
1286static struct platform_driver rk_iommu_driver = { 1290static struct platform_driver rk_iommu_driver = {
1287 .probe = rk_iommu_probe, 1291 .probe = rk_iommu_probe,
@@ -1299,8 +1303,3 @@ static int __init rk_iommu_init(void)
1299 return platform_driver_register(&rk_iommu_driver); 1303 return platform_driver_register(&rk_iommu_driver);
1300} 1304}
1301subsys_initcall(rk_iommu_init); 1305subsys_initcall(rk_iommu_init);
1302
1303MODULE_DESCRIPTION("IOMMU API for Rockchip");
1304MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
1305MODULE_ALIAS("platform:rockchip-iommu");
1306MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 7b1361d57a17..da6a4e357b2b 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -3,6 +3,8 @@
3 * 3 *
4 * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Author: Hiroshi DOYU <hdoyu@nvidia.com>
7 *
6 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation. 10 * version 2, as published by the Free Software Foundation.
@@ -19,7 +21,8 @@
19 21
20#define pr_fmt(fmt) "%s(): " fmt, __func__ 22#define pr_fmt(fmt) "%s(): " fmt, __func__
21 23
22#include <linux/module.h> 24#include <linux/init.h>
25#include <linux/moduleparam.h>
23#include <linux/platform_device.h> 26#include <linux/platform_device.h>
24#include <linux/spinlock.h> 27#include <linux/spinlock.h>
25#include <linux/slab.h> 28#include <linux/slab.h>
@@ -478,20 +481,6 @@ static int tegra_gart_probe(struct platform_device *pdev)
478 return 0; 481 return 0;
479} 482}
480 483
481static int tegra_gart_remove(struct platform_device *pdev)
482{
483 struct gart_device *gart = platform_get_drvdata(pdev);
484
485 iommu_device_unregister(&gart->iommu);
486 iommu_device_sysfs_remove(&gart->iommu);
487
488 writel(0, gart->regs + GART_CONFIG);
489 if (gart->savedata)
490 vfree(gart->savedata);
491 gart_handle = NULL;
492 return 0;
493}
494
495static const struct dev_pm_ops tegra_gart_pm_ops = { 484static const struct dev_pm_ops tegra_gart_pm_ops = {
496 .suspend = tegra_gart_suspend, 485 .suspend = tegra_gart_suspend,
497 .resume = tegra_gart_resume, 486 .resume = tegra_gart_resume,
@@ -501,34 +490,22 @@ static const struct of_device_id tegra_gart_of_match[] = {
501 { .compatible = "nvidia,tegra20-gart", }, 490 { .compatible = "nvidia,tegra20-gart", },
502 { }, 491 { },
503}; 492};
504MODULE_DEVICE_TABLE(of, tegra_gart_of_match);
505 493
506static struct platform_driver tegra_gart_driver = { 494static struct platform_driver tegra_gart_driver = {
507 .probe = tegra_gart_probe, 495 .probe = tegra_gart_probe,
508 .remove = tegra_gart_remove,
509 .driver = { 496 .driver = {
510 .name = "tegra-gart", 497 .name = "tegra-gart",
511 .pm = &tegra_gart_pm_ops, 498 .pm = &tegra_gart_pm_ops,
512 .of_match_table = tegra_gart_of_match, 499 .of_match_table = tegra_gart_of_match,
500 .suppress_bind_attrs = true,
513 }, 501 },
514}; 502};
515 503
516static int tegra_gart_init(void) 504static int __init tegra_gart_init(void)
517{ 505{
518 return platform_driver_register(&tegra_gart_driver); 506 return platform_driver_register(&tegra_gart_driver);
519} 507}
520
521static void __exit tegra_gart_exit(void)
522{
523 platform_driver_unregister(&tegra_gart_driver);
524}
525
526subsys_initcall(tegra_gart_init); 508subsys_initcall(tegra_gart_init);
527module_exit(tegra_gart_exit);
528module_param(gart_debug, bool, 0644);
529 509
510module_param(gart_debug, bool, 0644);
530MODULE_PARM_DESC(gart_debug, "Enable GART debugging"); 511MODULE_PARM_DESC(gart_debug, "Enable GART debugging");
531MODULE_DESCRIPTION("IOMMU API for GART in Tegra20");
532MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
533MODULE_ALIAS("platform:tegra-gart");
534MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 0d03341317c4..3a5c7dc6dc57 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -846,7 +846,7 @@ static struct iommu_group *tegra_smmu_group_get(struct tegra_smmu *smmu,
846 846
847static struct iommu_group *tegra_smmu_device_group(struct device *dev) 847static struct iommu_group *tegra_smmu_device_group(struct device *dev)
848{ 848{
849 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 849 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
850 struct tegra_smmu *smmu = dev->archdata.iommu; 850 struct tegra_smmu *smmu = dev->archdata.iommu;
851 struct iommu_group *group; 851 struct iommu_group *group;
852 852
@@ -926,17 +926,7 @@ static int tegra_smmu_swgroups_show(struct seq_file *s, void *data)
926 return 0; 926 return 0;
927} 927}
928 928
929static int tegra_smmu_swgroups_open(struct inode *inode, struct file *file) 929DEFINE_SHOW_ATTRIBUTE(tegra_smmu_swgroups);
930{
931 return single_open(file, tegra_smmu_swgroups_show, inode->i_private);
932}
933
934static const struct file_operations tegra_smmu_swgroups_fops = {
935 .open = tegra_smmu_swgroups_open,
936 .read = seq_read,
937 .llseek = seq_lseek,
938 .release = single_release,
939};
940 930
941static int tegra_smmu_clients_show(struct seq_file *s, void *data) 931static int tegra_smmu_clients_show(struct seq_file *s, void *data)
942{ 932{
@@ -964,17 +954,7 @@ static int tegra_smmu_clients_show(struct seq_file *s, void *data)
964 return 0; 954 return 0;
965} 955}
966 956
967static int tegra_smmu_clients_open(struct inode *inode, struct file *file) 957DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
968{
969 return single_open(file, tegra_smmu_clients_show, inode->i_private);
970}
971
972static const struct file_operations tegra_smmu_clients_fops = {
973 .open = tegra_smmu_clients_open,
974 .read = seq_read,
975 .llseek = seq_lseek,
976 .release = single_release,
977};
978 958
979static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu) 959static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
980{ 960{
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index 0e4193cb08cf..749321eb91ae 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -15,7 +15,7 @@
15 * Intel SCIF driver. 15 * Intel SCIF driver.
16 * 16 *
17 */ 17 */
18#include <linux/dma_remapping.h> 18#include <linux/intel-iommu.h>
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/sched/mm.h> 20#include <linux/sched/mm.h>
21#include <linux/sched/signal.h> 21#include <linux/sched/signal.h>
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
index 84af3033a473..964dd0fc3657 100644
--- a/drivers/misc/mic/scif/scif_rma.h
+++ b/drivers/misc/mic/scif/scif_rma.h
@@ -53,7 +53,7 @@
53#ifndef SCIF_RMA_H 53#ifndef SCIF_RMA_H
54#define SCIF_RMA_H 54#define SCIF_RMA_H
55 55
56#include <linux/dma_remapping.h> 56#include <linux/intel-iommu.h>
57#include <linux/mmu_notifier.h> 57#include <linux/mmu_notifier.h>
58 58
59#include "../bus/scif_bus.h" 59#include "../bus/scif_bus.h"
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 46ab9c041091..005e65922608 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -245,7 +245,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci)
245 * an iommu. Doing anything when there is no iommu is definitely 245 * an iommu. Doing anything when there is no iommu is definitely
246 * unsafe... 246 * unsafe...
247 */ 247 */
248 if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !dev->iommu_group) 248 if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !device_iommu_mapped(dev))
249 return; 249 return;
250 250
251 xhci_info(xhci, "Zeroing 64bit base registers, expecting fault\n"); 251 xhci_info(xhci, "Zeroing 64bit base registers, expecting fault\n");
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index d9fd3188615d..7651cfb14836 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -978,32 +978,6 @@ unlock:
978 return ret; 978 return ret;
979} 979}
980 980
981/*
982 * Turns out AMD IOMMU has a page table bug where it won't map large pages
983 * to a region that previously mapped smaller pages. This should be fixed
984 * soon, so this is just a temporary workaround to break mappings down into
985 * PAGE_SIZE. Better to map smaller pages than nothing.
986 */
987static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova,
988 unsigned long pfn, long npage, int prot)
989{
990 long i;
991 int ret = 0;
992
993 for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
994 ret = iommu_map(domain->domain, iova,
995 (phys_addr_t)pfn << PAGE_SHIFT,
996 PAGE_SIZE, prot | domain->prot);
997 if (ret)
998 break;
999 }
1000
1001 for (; i < npage && i > 0; i--, iova -= PAGE_SIZE)
1002 iommu_unmap(domain->domain, iova, PAGE_SIZE);
1003
1004 return ret;
1005}
1006
1007static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, 981static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
1008 unsigned long pfn, long npage, int prot) 982 unsigned long pfn, long npage, int prot)
1009{ 983{
@@ -1013,11 +987,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
1013 list_for_each_entry(d, &iommu->domain_list, next) { 987 list_for_each_entry(d, &iommu->domain_list, next) {
1014 ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, 988 ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
1015 npage << PAGE_SHIFT, prot | d->prot); 989 npage << PAGE_SHIFT, prot | d->prot);
1016 if (ret) { 990 if (ret)
1017 if (ret != -EBUSY || 991 goto unwind;
1018 map_try_harder(d, iova, pfn, npage, prot))
1019 goto unwind;
1020 }
1021 992
1022 cond_resched(); 993 cond_resched();
1023 } 994 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 1b25c7a43f4c..6cb4640b6160 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1058,6 +1058,16 @@ static inline struct device *kobj_to_dev(struct kobject *kobj)
1058 return container_of(kobj, struct device, kobj); 1058 return container_of(kobj, struct device, kobj);
1059} 1059}
1060 1060
1061/**
1062 * device_iommu_mapped - Returns true when the device DMA is translated
1063 * by an IOMMU
1064 * @dev: Device to perform the check on
1065 */
1066static inline bool device_iommu_mapped(struct device *dev)
1067{
1068 return (dev->iommu_group != NULL);
1069}
1070
1061/* Get the wakeup routines, which depend on struct device */ 1071/* Get the wakeup routines, which depend on struct device */
1062#include <linux/pm_wakeup.h> 1072#include <linux/pm_wakeup.h>
1063 1073
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
deleted file mode 100644
index 21b3e7d33d68..000000000000
--- a/include/linux/dma_remapping.h
+++ /dev/null
@@ -1,58 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _DMA_REMAPPING_H
3#define _DMA_REMAPPING_H
4
5/*
6 * VT-d hardware uses 4KiB page size regardless of host page size.
7 */
8#define VTD_PAGE_SHIFT (12)
9#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT)
10#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
11#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
12
13#define VTD_STRIDE_SHIFT (9)
14#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
15
16#define DMA_PTE_READ (1)
17#define DMA_PTE_WRITE (2)
18#define DMA_PTE_LARGE_PAGE (1 << 7)
19#define DMA_PTE_SNP (1 << 11)
20
21#define CONTEXT_TT_MULTI_LEVEL 0
22#define CONTEXT_TT_DEV_IOTLB 1
23#define CONTEXT_TT_PASS_THROUGH 2
24/* Extended context entry types */
25#define CONTEXT_TT_PT_PASID 4
26#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
27#define CONTEXT_TT_MASK (7ULL << 2)
28
29#define CONTEXT_DINVE (1ULL << 8)
30#define CONTEXT_PRS (1ULL << 9)
31#define CONTEXT_PASIDE (1ULL << 11)
32
33struct intel_iommu;
34struct dmar_domain;
35struct root_entry;
36
37
38#ifdef CONFIG_INTEL_IOMMU
39extern int iommu_calculate_agaw(struct intel_iommu *iommu);
40extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
41extern int dmar_disabled;
42extern int intel_iommu_enabled;
43extern int intel_iommu_tboot_noforce;
44#else
45static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
46{
47 return 0;
48}
49static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
50{
51 return 0;
52}
53#define dmar_disabled (1)
54#define intel_iommu_enabled (0)
55#endif
56
57
58#endif
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index b0ae25837361..0605f3bf6e79 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -26,7 +26,6 @@
26#include <linux/iova.h> 26#include <linux/iova.h>
27#include <linux/io.h> 27#include <linux/io.h>
28#include <linux/idr.h> 28#include <linux/idr.h>
29#include <linux/dma_remapping.h>
30#include <linux/mmu_notifier.h> 29#include <linux/mmu_notifier.h>
31#include <linux/list.h> 30#include <linux/list.h>
32#include <linux/iommu.h> 31#include <linux/iommu.h>
@@ -37,9 +36,29 @@
37#include <asm/iommu.h> 36#include <asm/iommu.h>
38 37
39/* 38/*
40 * Intel IOMMU register specification per version 1.0 public spec. 39 * VT-d hardware uses 4KiB page size regardless of host page size.
41 */ 40 */
41#define VTD_PAGE_SHIFT (12)
42#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT)
43#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
44#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
45
46#define VTD_STRIDE_SHIFT (9)
47#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
48
49#define DMA_PTE_READ (1)
50#define DMA_PTE_WRITE (2)
51#define DMA_PTE_LARGE_PAGE (1 << 7)
52#define DMA_PTE_SNP (1 << 11)
42 53
54#define CONTEXT_TT_MULTI_LEVEL 0
55#define CONTEXT_TT_DEV_IOTLB 1
56#define CONTEXT_TT_PASS_THROUGH 2
57#define CONTEXT_PASIDE BIT_ULL(3)
58
59/*
60 * Intel IOMMU register specification per version 1.0 public spec.
61 */
43#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ 62#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */
44#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ 63#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */
45#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ 64#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */
@@ -151,6 +170,10 @@
151 * Extended Capability Register 170 * Extended Capability Register
152 */ 171 */
153 172
173#define ecap_smpwc(e) (((e) >> 48) & 0x1)
174#define ecap_flts(e) (((e) >> 47) & 0x1)
175#define ecap_slts(e) (((e) >> 46) & 0x1)
176#define ecap_smts(e) (((e) >> 43) & 0x1)
154#define ecap_dit(e) ((e >> 41) & 0x1) 177#define ecap_dit(e) ((e >> 41) & 0x1)
155#define ecap_pasid(e) ((e >> 40) & 0x1) 178#define ecap_pasid(e) ((e >> 40) & 0x1)
156#define ecap_pss(e) ((e >> 35) & 0x1f) 179#define ecap_pss(e) ((e >> 35) & 0x1f)
@@ -229,6 +252,7 @@
229 252
230/* DMA_RTADDR_REG */ 253/* DMA_RTADDR_REG */
231#define DMA_RTADDR_RTT (((u64)1) << 11) 254#define DMA_RTADDR_RTT (((u64)1) << 11)
255#define DMA_RTADDR_SMT (((u64)1) << 10)
232 256
233/* CCMD_REG */ 257/* CCMD_REG */
234#define DMA_CCMD_ICC (((u64)1) << 63) 258#define DMA_CCMD_ICC (((u64)1) << 63)
@@ -374,13 +398,18 @@ enum {
374#define QI_GRAN_NONG_PASID 2 398#define QI_GRAN_NONG_PASID 2
375#define QI_GRAN_PSI_PASID 3 399#define QI_GRAN_PSI_PASID 3
376 400
401#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap))
402
377struct qi_desc { 403struct qi_desc {
378 u64 low, high; 404 u64 qw0;
405 u64 qw1;
406 u64 qw2;
407 u64 qw3;
379}; 408};
380 409
381struct q_inval { 410struct q_inval {
382 raw_spinlock_t q_lock; 411 raw_spinlock_t q_lock;
383 struct qi_desc *desc; /* invalidation queue */ 412 void *desc; /* invalidation queue */
384 int *desc_status; /* desc status */ 413 int *desc_status; /* desc status */
385 int free_head; /* first free entry */ 414 int free_head; /* first free entry */
386 int free_tail; /* last free entry */ 415 int free_tail; /* last free entry */
@@ -512,15 +541,8 @@ struct intel_iommu {
512 struct iommu_flush flush; 541 struct iommu_flush flush;
513#endif 542#endif
514#ifdef CONFIG_INTEL_IOMMU_SVM 543#ifdef CONFIG_INTEL_IOMMU_SVM
515 /* These are large and need to be contiguous, so we allocate just
516 * one for now. We'll maybe want to rethink that if we truly give
517 * devices away to userspace processes (e.g. for DPDK) and don't
518 * want to trust that userspace will use *only* the PASID it was
519 * told to. But while it's all driver-arbitrated, we're fine. */
520 struct pasid_state_entry *pasid_state_table;
521 struct page_req_dsc *prq; 544 struct page_req_dsc *prq;
522 unsigned char prq_name[16]; /* Name for PRQ interrupt */ 545 unsigned char prq_name[16]; /* Name for PRQ interrupt */
523 u32 pasid_max;
524#endif 546#endif
525 struct q_inval *qi; /* Queued invalidation info */ 547 struct q_inval *qi; /* Queued invalidation info */
526 u32 *iommu_state; /* Store iommu states between suspend and resume.*/ 548 u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -563,6 +585,49 @@ static inline void __iommu_flush_cache(
563 clflush_cache_range(addr, size); 585 clflush_cache_range(addr, size);
564} 586}
565 587
588/*
589 * 0: readable
590 * 1: writable
591 * 2-6: reserved
592 * 7: super page
593 * 8-10: available
594 * 11: snoop behavior
595 * 12-63: Host physcial address
596 */
597struct dma_pte {
598 u64 val;
599};
600
601static inline void dma_clear_pte(struct dma_pte *pte)
602{
603 pte->val = 0;
604}
605
606static inline u64 dma_pte_addr(struct dma_pte *pte)
607{
608#ifdef CONFIG_64BIT
609 return pte->val & VTD_PAGE_MASK;
610#else
611 /* Must have a full atomic 64-bit read */
612 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
613#endif
614}
615
616static inline bool dma_pte_present(struct dma_pte *pte)
617{
618 return (pte->val & 3) != 0;
619}
620
621static inline bool dma_pte_superpage(struct dma_pte *pte)
622{
623 return (pte->val & DMA_PTE_LARGE_PAGE);
624}
625
626static inline int first_pte_in_page(struct dma_pte *pte)
627{
628 return !((unsigned long)pte & ~VTD_PAGE_MASK);
629}
630
566extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); 631extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
567extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); 632extern int dmar_find_matched_atsr_unit(struct pci_dev *dev);
568 633
@@ -587,10 +652,10 @@ void free_pgtable_page(void *vaddr);
587struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); 652struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
588int for_each_device_domain(int (*fn)(struct device_domain_info *info, 653int for_each_device_domain(int (*fn)(struct device_domain_info *info,
589 void *data), void *data); 654 void *data), void *data);
655void iommu_flush_write_buffer(struct intel_iommu *iommu);
590 656
591#ifdef CONFIG_INTEL_IOMMU_SVM 657#ifdef CONFIG_INTEL_IOMMU_SVM
592int intel_svm_init(struct intel_iommu *iommu); 658int intel_svm_init(struct intel_iommu *iommu);
593int intel_svm_exit(struct intel_iommu *iommu);
594extern int intel_svm_enable_prq(struct intel_iommu *iommu); 659extern int intel_svm_enable_prq(struct intel_iommu *iommu);
595extern int intel_svm_finish_prq(struct intel_iommu *iommu); 660extern int intel_svm_finish_prq(struct intel_iommu *iommu);
596 661
@@ -632,4 +697,23 @@ bool context_present(struct context_entry *context);
632struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 697struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
633 u8 devfn, int alloc); 698 u8 devfn, int alloc);
634 699
700#ifdef CONFIG_INTEL_IOMMU
701extern int iommu_calculate_agaw(struct intel_iommu *iommu);
702extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
703extern int dmar_disabled;
704extern int intel_iommu_enabled;
705extern int intel_iommu_tboot_noforce;
706#else
707static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
708{
709 return 0;
710}
711static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
712{
713 return 0;
714}
715#define dmar_disabled (1)
716#define intel_iommu_enabled (0)
717#endif
718
635#endif 719#endif
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a1d28f42cb77..e90da6b6f3d1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -168,8 +168,8 @@ struct iommu_resv_region {
168 * @map: map a physically contiguous memory region to an iommu domain 168 * @map: map a physically contiguous memory region to an iommu domain
169 * @unmap: unmap a physically contiguous memory region from an iommu domain 169 * @unmap: unmap a physically contiguous memory region from an iommu domain
170 * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain 170 * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
171 * @tlb_range_add: Add a given iova range to the flush queue for this domain 171 * @iotlb_range_add: Add a given iova range to the flush queue for this domain
172 * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush 172 * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
173 * queue 173 * queue
174 * @iova_to_phys: translate iova to physical address 174 * @iova_to_phys: translate iova to physical address
175 * @add_device: add device to iommu grouping 175 * @add_device: add device to iommu grouping
@@ -398,6 +398,20 @@ void iommu_fwspec_free(struct device *dev);
398int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); 398int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
399const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); 399const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
400 400
401static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
402{
403 return dev->iommu_fwspec;
404}
405
406static inline void dev_iommu_fwspec_set(struct device *dev,
407 struct iommu_fwspec *fwspec)
408{
409 dev->iommu_fwspec = fwspec;
410}
411
412int iommu_probe_device(struct device *dev);
413void iommu_release_device(struct device *dev);
414
401#else /* CONFIG_IOMMU_API */ 415#else /* CONFIG_IOMMU_API */
402 416
403struct iommu_ops {}; 417struct iommu_ops {};