aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-13 20:05:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-13 20:05:32 -0500
commit934f98d7e8123892bd9ca8ea08728ee0784e6597 (patch)
tree1a73064970020d945ca5f2c31ce420021b99e94c
parentf3996e6ac6e2bd739d8a82cc9acae0653c2d5dca (diff)
parent222e684ca762e9288108fcf852eb5d08cbe10ae3 (diff)
Merge tag 'vfio-v4.4-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - Use kernel interfaces for VPD emulation (Alex Williamson) - Platform fix for releasing IRQs (Eric Auger) - Type1 IOMMU always advertises PAGE_SIZE support when smaller mapping sizes are available (Eric Auger) - Platform fixes for incorrectly using copies of structures rather than pointers to structures (James Morse) - Rework platform reset modules, fix leak, and add AMD xgbe reset module (Eric Auger) - Fix vfio_device_get_from_name() return value (Joerg Roedel) - No-IOMMU interface (Alex Williamson) - Fix potential out of bounds array access in PCI config handling (Dan Carpenter) * tag 'vfio-v4.4-rc1' of git://github.com/awilliam/linux-vfio: vfio/pci: make an array larger vfio: Include No-IOMMU mode vfio: Fix bug in vfio_device_get_from_name() VFIO: platform: reset: AMD xgbe reset module vfio: platform: reset: calxedaxgmac: fix ioaddr leak vfio: platform: add dev_info on device reset vfio: platform: use list of registered reset function vfio: platform: add compat in vfio_platform_device vfio: platform: reset: calxedaxgmac: add reset function registration vfio: platform: introduce module_vfio_reset_handler macro vfio: platform: add capability to register a reset function vfio: platform: introduce vfio-platform-base module vfio/platform: store mapped memory in region, instead of an on-stack copy vfio/type1: handle case where IOMMU does not support PAGE_SIZE size VFIO: platform: clear IRQ_NOAUTOEN when de-assigning the IRQ vfio/pci: Use kernel VPD access functions vfio: Whitelist PCI bridges
-rw-r--r--drivers/vfio/Kconfig15
-rw-r--r--drivers/vfio/pci/vfio_pci.c8
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c74
-rw-r--r--drivers/vfio/platform/Makefile6
-rw-r--r--drivers/vfio/platform/reset/Kconfig8
-rw-r--r--drivers/vfio/platform/reset/Makefile2
-rw-r--r--drivers/vfio/platform/reset/vfio_platform_amdxgbe.c127
-rw-r--r--drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c19
-rw-r--r--drivers/vfio/platform/vfio_amba.c1
-rw-r--r--drivers/vfio/platform/vfio_platform.c1
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c155
-rw-r--r--drivers/vfio/platform/vfio_platform_irq.c1
-rw-r--r--drivers/vfio/platform/vfio_platform_private.h40
-rw-r--r--drivers/vfio/vfio.c224
-rw-r--r--drivers/vfio/vfio_iommu_type1.c15
-rw-r--r--include/linux/vfio.h3
-rw-r--r--include/uapi/linux/vfio.h7
17 files changed, 616 insertions, 90 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 850d86ca685b..da6e2ce77495 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -31,6 +31,21 @@ menuconfig VFIO
31 31
32 If you don't know what to do here, say N. 32 If you don't know what to do here, say N.
33 33
34menuconfig VFIO_NOIOMMU
35 bool "VFIO No-IOMMU support"
36 depends on VFIO
37 help
38 VFIO is built on the ability to isolate devices using the IOMMU.
39 Only with an IOMMU can userspace access to DMA capable devices be
40 considered secure. VFIO No-IOMMU mode enables IOMMU groups for
41 devices without IOMMU backing for the purpose of re-using the VFIO
42 infrastructure in a non-secure mode. Use of this mode will result
43 in an unsupportable kernel and will therefore taint the kernel.
44 Device assignment to virtual machines is also not possible with
45 this mode since there is no IOMMU to provide DMA translation.
46
47 If you don't know what to do here, say N.
48
34source "drivers/vfio/pci/Kconfig" 49source "drivers/vfio/pci/Kconfig"
35source "drivers/vfio/platform/Kconfig" 50source "drivers/vfio/platform/Kconfig"
36source "virt/lib/Kconfig" 51source "virt/lib/Kconfig"
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 964ad572aaee..32b88bd2c82c 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -940,13 +940,13 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
940 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) 940 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
941 return -EINVAL; 941 return -EINVAL;
942 942
943 group = iommu_group_get(&pdev->dev); 943 group = vfio_iommu_group_get(&pdev->dev);
944 if (!group) 944 if (!group)
945 return -EINVAL; 945 return -EINVAL;
946 946
947 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); 947 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
948 if (!vdev) { 948 if (!vdev) {
949 iommu_group_put(group); 949 vfio_iommu_group_put(group, &pdev->dev);
950 return -ENOMEM; 950 return -ENOMEM;
951 } 951 }
952 952
@@ -957,7 +957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
957 957
958 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); 958 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
959 if (ret) { 959 if (ret) {
960 iommu_group_put(group); 960 vfio_iommu_group_put(group, &pdev->dev);
961 kfree(vdev); 961 kfree(vdev);
962 return ret; 962 return ret;
963 } 963 }
@@ -993,7 +993,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
993 if (!vdev) 993 if (!vdev)
994 return; 994 return;
995 995
996 iommu_group_put(pdev->dev.iommu_group); 996 vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
997 kfree(vdev); 997 kfree(vdev);
998 998
999 if (vfio_pci_is_vga(pdev)) { 999 if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index ff75ca31a199..fe2b470d7ec6 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -46,7 +46,7 @@
46 * 0: Removed from the user visible capability list 46 * 0: Removed from the user visible capability list
47 * FF: Variable length 47 * FF: Variable length
48 */ 48 */
49static u8 pci_cap_length[] = { 49static const u8 pci_cap_length[PCI_CAP_ID_MAX + 1] = {
50 [PCI_CAP_ID_BASIC] = PCI_STD_HEADER_SIZEOF, /* pci config header */ 50 [PCI_CAP_ID_BASIC] = PCI_STD_HEADER_SIZEOF, /* pci config header */
51 [PCI_CAP_ID_PM] = PCI_PM_SIZEOF, 51 [PCI_CAP_ID_PM] = PCI_PM_SIZEOF,
52 [PCI_CAP_ID_AGP] = PCI_AGP_SIZEOF, 52 [PCI_CAP_ID_AGP] = PCI_AGP_SIZEOF,
@@ -74,7 +74,7 @@ static u8 pci_cap_length[] = {
74 * 0: Removed or masked from the user visible capabilty list 74 * 0: Removed or masked from the user visible capabilty list
75 * FF: Variable length 75 * FF: Variable length
76 */ 76 */
77static u16 pci_ext_cap_length[] = { 77static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = {
78 [PCI_EXT_CAP_ID_ERR] = PCI_ERR_ROOT_COMMAND, 78 [PCI_EXT_CAP_ID_ERR] = PCI_ERR_ROOT_COMMAND,
79 [PCI_EXT_CAP_ID_VC] = 0xFF, 79 [PCI_EXT_CAP_ID_VC] = 0xFF,
80 [PCI_EXT_CAP_ID_DSN] = PCI_EXT_CAP_DSN_SIZEOF, 80 [PCI_EXT_CAP_ID_DSN] = PCI_EXT_CAP_DSN_SIZEOF,
@@ -671,6 +671,73 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm)
671 return 0; 671 return 0;
672} 672}
673 673
674static int vfio_vpd_config_write(struct vfio_pci_device *vdev, int pos,
675 int count, struct perm_bits *perm,
676 int offset, __le32 val)
677{
678 struct pci_dev *pdev = vdev->pdev;
679 __le16 *paddr = (__le16 *)(vdev->vconfig + pos - offset + PCI_VPD_ADDR);
680 __le32 *pdata = (__le32 *)(vdev->vconfig + pos - offset + PCI_VPD_DATA);
681 u16 addr;
682 u32 data;
683
684 /*
685 * Write through to emulation. If the write includes the upper byte
686 * of PCI_VPD_ADDR, then the PCI_VPD_ADDR_F bit is written and we
687 * have work to do.
688 */
689 count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
690 if (count < 0 || offset > PCI_VPD_ADDR + 1 ||
691 offset + count <= PCI_VPD_ADDR + 1)
692 return count;
693
694 addr = le16_to_cpu(*paddr);
695
696 if (addr & PCI_VPD_ADDR_F) {
697 data = le32_to_cpu(*pdata);
698 if (pci_write_vpd(pdev, addr & ~PCI_VPD_ADDR_F, 4, &data) != 4)
699 return count;
700 } else {
701 if (pci_read_vpd(pdev, addr, 4, &data) != 4)
702 return count;
703 *pdata = cpu_to_le32(data);
704 }
705
706 /*
707 * Toggle PCI_VPD_ADDR_F in the emulated PCI_VPD_ADDR register to
708 * signal completion. If an error occurs above, we assume that not
709 * toggling this bit will induce a driver timeout.
710 */
711 addr ^= PCI_VPD_ADDR_F;
712 *paddr = cpu_to_le16(addr);
713
714 return count;
715}
716
717/* Permissions for Vital Product Data capability */
718static int __init init_pci_cap_vpd_perm(struct perm_bits *perm)
719{
720 if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_VPD]))
721 return -ENOMEM;
722
723 perm->writefn = vfio_vpd_config_write;
724
725 /*
726 * We always virtualize the next field so we can remove
727 * capabilities from the chain if we want to.
728 */
729 p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
730
731 /*
732 * Both the address and data registers are virtualized to
733 * enable access through the pci_vpd_read/write functions
734 */
735 p_setw(perm, PCI_VPD_ADDR, (u16)ALL_VIRT, (u16)ALL_WRITE);
736 p_setd(perm, PCI_VPD_DATA, ALL_VIRT, ALL_WRITE);
737
738 return 0;
739}
740
674/* Permissions for PCI-X capability */ 741/* Permissions for PCI-X capability */
675static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) 742static int __init init_pci_cap_pcix_perm(struct perm_bits *perm)
676{ 743{
@@ -790,6 +857,7 @@ void vfio_pci_uninit_perm_bits(void)
790 free_perm_bits(&cap_perms[PCI_CAP_ID_BASIC]); 857 free_perm_bits(&cap_perms[PCI_CAP_ID_BASIC]);
791 858
792 free_perm_bits(&cap_perms[PCI_CAP_ID_PM]); 859 free_perm_bits(&cap_perms[PCI_CAP_ID_PM]);
860 free_perm_bits(&cap_perms[PCI_CAP_ID_VPD]);
793 free_perm_bits(&cap_perms[PCI_CAP_ID_PCIX]); 861 free_perm_bits(&cap_perms[PCI_CAP_ID_PCIX]);
794 free_perm_bits(&cap_perms[PCI_CAP_ID_EXP]); 862 free_perm_bits(&cap_perms[PCI_CAP_ID_EXP]);
795 free_perm_bits(&cap_perms[PCI_CAP_ID_AF]); 863 free_perm_bits(&cap_perms[PCI_CAP_ID_AF]);
@@ -807,7 +875,7 @@ int __init vfio_pci_init_perm_bits(void)
807 875
808 /* Capabilities */ 876 /* Capabilities */
809 ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]); 877 ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]);
810 cap_perms[PCI_CAP_ID_VPD].writefn = vfio_raw_config_write; 878 ret |= init_pci_cap_vpd_perm(&cap_perms[PCI_CAP_ID_VPD]);
811 ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]); 879 ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]);
812 cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_raw_config_write; 880 cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_raw_config_write;
813 ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]); 881 ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]);
diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile
index 9ce8afe28450..41a6224f5e6b 100644
--- a/drivers/vfio/platform/Makefile
+++ b/drivers/vfio/platform/Makefile
@@ -1,10 +1,12 @@
1 1vfio-platform-base-y := vfio_platform_common.o vfio_platform_irq.o
2vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o 2vfio-platform-y := vfio_platform.o
3 3
4obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o 4obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o
5obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform-base.o
5obj-$(CONFIG_VFIO_PLATFORM) += reset/ 6obj-$(CONFIG_VFIO_PLATFORM) += reset/
6 7
7vfio-amba-y := vfio_amba.o 8vfio-amba-y := vfio_amba.o
8 9
9obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o 10obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o
11obj-$(CONFIG_VFIO_AMBA) += vfio-platform-base.o
10obj-$(CONFIG_VFIO_AMBA) += reset/ 12obj-$(CONFIG_VFIO_AMBA) += reset/
diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig
index 746b96b0003b..70cccc582bee 100644
--- a/drivers/vfio/platform/reset/Kconfig
+++ b/drivers/vfio/platform/reset/Kconfig
@@ -5,3 +5,11 @@ config VFIO_PLATFORM_CALXEDAXGMAC_RESET
5 Enables the VFIO platform driver to handle reset for Calxeda xgmac 5 Enables the VFIO platform driver to handle reset for Calxeda xgmac
6 6
7 If you don't know what to do here, say N. 7 If you don't know what to do here, say N.
8
9config VFIO_PLATFORM_AMDXGBE_RESET
10 tristate "VFIO support for AMD XGBE reset"
11 depends on VFIO_PLATFORM
12 help
13 Enables the VFIO platform driver to handle reset for AMD XGBE
14
15 If you don't know what to do here, say N.
diff --git a/drivers/vfio/platform/reset/Makefile b/drivers/vfio/platform/reset/Makefile
index 2a486af9f8fa..93f4e232697b 100644
--- a/drivers/vfio/platform/reset/Makefile
+++ b/drivers/vfio/platform/reset/Makefile
@@ -1,5 +1,7 @@
1vfio-platform-calxedaxgmac-y := vfio_platform_calxedaxgmac.o 1vfio-platform-calxedaxgmac-y := vfio_platform_calxedaxgmac.o
2vfio-platform-amdxgbe-y := vfio_platform_amdxgbe.o
2 3
3ccflags-y += -Idrivers/vfio/platform 4ccflags-y += -Idrivers/vfio/platform
4 5
5obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o 6obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o
7obj-$(CONFIG_VFIO_PLATFORM_AMDXGBE_RESET) += vfio-platform-amdxgbe.o
diff --git a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c
new file mode 100644
index 000000000000..da5356f48d0b
--- /dev/null
+++ b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c
@@ -0,0 +1,127 @@
1/*
2 * VFIO platform driver specialized for AMD xgbe reset
3 * reset code is inherited from AMD xgbe native driver
4 *
5 * Copyright (c) 2015 Linaro Ltd.
6 * www.linaro.org
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <linux/module.h>
22#include <linux/kernel.h>
23#include <linux/init.h>
24#include <linux/io.h>
25#include <uapi/linux/mdio.h>
26#include <linux/delay.h>
27
28#include "vfio_platform_private.h"
29
30#define DMA_MR 0x3000
31#define MAC_VR 0x0110
32#define DMA_ISR 0x3008
33#define MAC_ISR 0x00b0
34#define PCS_MMD_SELECT 0xff
35#define MDIO_AN_INT 0x8002
36#define MDIO_AN_INTMASK 0x8001
37
38static unsigned int xmdio_read(void *ioaddr, unsigned int mmd,
39 unsigned int reg)
40{
41 unsigned int mmd_address, value;
42
43 mmd_address = (mmd << 16) | ((reg) & 0xffff);
44 iowrite32(mmd_address >> 8, ioaddr + (PCS_MMD_SELECT << 2));
45 value = ioread32(ioaddr + ((mmd_address & 0xff) << 2));
46 return value;
47}
48
49static void xmdio_write(void *ioaddr, unsigned int mmd,
50 unsigned int reg, unsigned int value)
51{
52 unsigned int mmd_address;
53
54 mmd_address = (mmd << 16) | ((reg) & 0xffff);
55 iowrite32(mmd_address >> 8, ioaddr + (PCS_MMD_SELECT << 2));
56 iowrite32(value, ioaddr + ((mmd_address & 0xff) << 2));
57}
58
59int vfio_platform_amdxgbe_reset(struct vfio_platform_device *vdev)
60{
61 struct vfio_platform_region *xgmac_regs = &vdev->regions[0];
62 struct vfio_platform_region *xpcs_regs = &vdev->regions[1];
63 u32 dma_mr_value, pcs_value, value;
64 unsigned int count;
65
66 if (!xgmac_regs->ioaddr) {
67 xgmac_regs->ioaddr =
68 ioremap_nocache(xgmac_regs->addr, xgmac_regs->size);
69 if (!xgmac_regs->ioaddr)
70 return -ENOMEM;
71 }
72 if (!xpcs_regs->ioaddr) {
73 xpcs_regs->ioaddr =
74 ioremap_nocache(xpcs_regs->addr, xpcs_regs->size);
75 if (!xpcs_regs->ioaddr)
76 return -ENOMEM;
77 }
78
79 /* reset the PHY through MDIO*/
80 pcs_value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_PCS, MDIO_CTRL1);
81 pcs_value |= MDIO_CTRL1_RESET;
82 xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_PCS, MDIO_CTRL1, pcs_value);
83
84 count = 50;
85 do {
86 msleep(20);
87 pcs_value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_PCS,
88 MDIO_CTRL1);
89 } while ((pcs_value & MDIO_CTRL1_RESET) && --count);
90
91 if (pcs_value & MDIO_CTRL1_RESET)
92 pr_warn("%s XGBE PHY reset timeout\n", __func__);
93
94 /* disable auto-negotiation */
95 value = xmdio_read(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_CTRL1);
96 value &= ~MDIO_AN_CTRL1_ENABLE;
97 xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_CTRL1, value);
98
99 /* disable AN IRQ */
100 xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
101
102 /* clear AN IRQ */
103 xmdio_write(xpcs_regs->ioaddr, MDIO_MMD_AN, MDIO_AN_INT, 0);
104
105 /* MAC software reset */
106 dma_mr_value = ioread32(xgmac_regs->ioaddr + DMA_MR);
107 dma_mr_value |= 0x1;
108 iowrite32(dma_mr_value, xgmac_regs->ioaddr + DMA_MR);
109
110 usleep_range(10, 15);
111
112 count = 2000;
113 while (count-- && (ioread32(xgmac_regs->ioaddr + DMA_MR) & 1))
114 usleep_range(500, 600);
115
116 if (!count)
117 pr_warn("%s MAC SW reset failed\n", __func__);
118
119 return 0;
120}
121
122module_vfio_reset_handler("amd,xgbe-seattle-v1a", vfio_platform_amdxgbe_reset);
123
124MODULE_VERSION("0.1");
125MODULE_LICENSE("GPL v2");
126MODULE_AUTHOR("Eric Auger <eric.auger@linaro.org>");
127MODULE_DESCRIPTION("Reset support for AMD xgbe vfio platform device");
diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
index 619dc7d22082..e3d3d948e661 100644
--- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
+++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
@@ -30,8 +30,6 @@
30#define DRIVER_AUTHOR "Eric Auger <eric.auger@linaro.org>" 30#define DRIVER_AUTHOR "Eric Auger <eric.auger@linaro.org>"
31#define DRIVER_DESC "Reset support for Calxeda xgmac vfio platform device" 31#define DRIVER_DESC "Reset support for Calxeda xgmac vfio platform device"
32 32
33#define CALXEDAXGMAC_COMPAT "calxeda,hb-xgmac"
34
35/* XGMAC Register definitions */ 33/* XGMAC Register definitions */
36#define XGMAC_CONTROL 0x00000000 /* MAC Configuration */ 34#define XGMAC_CONTROL 0x00000000 /* MAC Configuration */
37 35
@@ -61,24 +59,25 @@ static inline void xgmac_mac_disable(void __iomem *ioaddr)
61 59
62int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) 60int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev)
63{ 61{
64 struct vfio_platform_region reg = vdev->regions[0]; 62 struct vfio_platform_region *reg = &vdev->regions[0];
65 63
66 if (!reg.ioaddr) { 64 if (!reg->ioaddr) {
67 reg.ioaddr = 65 reg->ioaddr =
68 ioremap_nocache(reg.addr, reg.size); 66 ioremap_nocache(reg->addr, reg->size);
69 if (!reg.ioaddr) 67 if (!reg->ioaddr)
70 return -ENOMEM; 68 return -ENOMEM;
71 } 69 }
72 70
73 /* disable IRQ */ 71 /* disable IRQ */
74 writel(0, reg.ioaddr + XGMAC_DMA_INTR_ENA); 72 writel(0, reg->ioaddr + XGMAC_DMA_INTR_ENA);
75 73
76 /* Disable the MAC core */ 74 /* Disable the MAC core */
77 xgmac_mac_disable(reg.ioaddr); 75 xgmac_mac_disable(reg->ioaddr);
78 76
79 return 0; 77 return 0;
80} 78}
81EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset); 79
80module_vfio_reset_handler("calxeda,hb-xgmac", vfio_platform_calxedaxgmac_reset);
82 81
83MODULE_VERSION(DRIVER_VERSION); 82MODULE_VERSION(DRIVER_VERSION);
84MODULE_LICENSE("GPL v2"); 83MODULE_LICENSE("GPL v2");
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index ff0331f72526..a66479bd0edf 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -67,6 +67,7 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
67 vdev->flags = VFIO_DEVICE_FLAGS_AMBA; 67 vdev->flags = VFIO_DEVICE_FLAGS_AMBA;
68 vdev->get_resource = get_amba_resource; 68 vdev->get_resource = get_amba_resource;
69 vdev->get_irq = get_amba_irq; 69 vdev->get_irq = get_amba_irq;
70 vdev->parent_module = THIS_MODULE;
70 71
71 ret = vfio_platform_probe_common(vdev, &adev->dev); 72 ret = vfio_platform_probe_common(vdev, &adev->dev);
72 if (ret) { 73 if (ret) {
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index cef645c83996..f1625dcfbb23 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -65,6 +65,7 @@ static int vfio_platform_probe(struct platform_device *pdev)
65 vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM; 65 vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM;
66 vdev->get_resource = get_platform_resource; 66 vdev->get_resource = get_platform_resource;
67 vdev->get_irq = get_platform_irq; 67 vdev->get_irq = get_platform_irq;
68 vdev->parent_module = THIS_MODULE;
68 69
69 ret = vfio_platform_probe_common(vdev, &pdev->dev); 70 ret = vfio_platform_probe_common(vdev, &pdev->dev);
70 if (ret) 71 if (ret)
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index e43efb5e92bf..a1c50d630792 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -23,44 +23,50 @@
23 23
24#include "vfio_platform_private.h" 24#include "vfio_platform_private.h"
25 25
26static DEFINE_MUTEX(driver_lock); 26#define DRIVER_VERSION "0.10"
27#define DRIVER_AUTHOR "Antonios Motakis <a.motakis@virtualopensystems.com>"
28#define DRIVER_DESC "VFIO platform base module"
27 29
28static const struct vfio_platform_reset_combo reset_lookup_table[] = { 30static LIST_HEAD(reset_list);
29 { 31static DEFINE_MUTEX(driver_lock);
30 .compat = "calxeda,hb-xgmac",
31 .reset_function_name = "vfio_platform_calxedaxgmac_reset",
32 .module_name = "vfio-platform-calxedaxgmac",
33 },
34};
35 32
36static void vfio_platform_get_reset(struct vfio_platform_device *vdev, 33static vfio_platform_reset_fn_t vfio_platform_lookup_reset(const char *compat,
37 struct device *dev) 34 struct module **module)
38{ 35{
39 const char *compat; 36 struct vfio_platform_reset_node *iter;
40 int (*reset)(struct vfio_platform_device *); 37 vfio_platform_reset_fn_t reset_fn = NULL;
41 int ret, i; 38
42 39 mutex_lock(&driver_lock);
43 ret = device_property_read_string(dev, "compatible", &compat); 40 list_for_each_entry(iter, &reset_list, link) {
44 if (ret) 41 if (!strcmp(iter->compat, compat) &&
45 return; 42 try_module_get(iter->owner)) {
46 43 *module = iter->owner;
47 for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) { 44 reset_fn = iter->reset;
48 if (!strcmp(reset_lookup_table[i].compat, compat)) { 45 break;
49 request_module(reset_lookup_table[i].module_name);
50 reset = __symbol_get(
51 reset_lookup_table[i].reset_function_name);
52 if (reset) {
53 vdev->reset = reset;
54 return;
55 }
56 } 46 }
57 } 47 }
48 mutex_unlock(&driver_lock);
49 return reset_fn;
50}
51
52static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
53{
54 char modname[256];
55
56 vdev->reset = vfio_platform_lookup_reset(vdev->compat,
57 &vdev->reset_module);
58 if (!vdev->reset) {
59 snprintf(modname, 256, "vfio-reset:%s", vdev->compat);
60 request_module(modname);
61 vdev->reset = vfio_platform_lookup_reset(vdev->compat,
62 &vdev->reset_module);
63 }
58} 64}
59 65
60static void vfio_platform_put_reset(struct vfio_platform_device *vdev) 66static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
61{ 67{
62 if (vdev->reset) 68 if (vdev->reset)
63 symbol_put_addr(vdev->reset); 69 module_put(vdev->reset_module);
64} 70}
65 71
66static int vfio_platform_regions_init(struct vfio_platform_device *vdev) 72static int vfio_platform_regions_init(struct vfio_platform_device *vdev)
@@ -138,15 +144,19 @@ static void vfio_platform_release(void *device_data)
138 mutex_lock(&driver_lock); 144 mutex_lock(&driver_lock);
139 145
140 if (!(--vdev->refcnt)) { 146 if (!(--vdev->refcnt)) {
141 if (vdev->reset) 147 if (vdev->reset) {
148 dev_info(vdev->device, "reset\n");
142 vdev->reset(vdev); 149 vdev->reset(vdev);
150 } else {
151 dev_warn(vdev->device, "no reset function found!\n");
152 }
143 vfio_platform_regions_cleanup(vdev); 153 vfio_platform_regions_cleanup(vdev);
144 vfio_platform_irq_cleanup(vdev); 154 vfio_platform_irq_cleanup(vdev);
145 } 155 }
146 156
147 mutex_unlock(&driver_lock); 157 mutex_unlock(&driver_lock);
148 158
149 module_put(THIS_MODULE); 159 module_put(vdev->parent_module);
150} 160}
151 161
152static int vfio_platform_open(void *device_data) 162static int vfio_platform_open(void *device_data)
@@ -154,7 +164,7 @@ static int vfio_platform_open(void *device_data)
154 struct vfio_platform_device *vdev = device_data; 164 struct vfio_platform_device *vdev = device_data;
155 int ret; 165 int ret;
156 166
157 if (!try_module_get(THIS_MODULE)) 167 if (!try_module_get(vdev->parent_module))
158 return -ENODEV; 168 return -ENODEV;
159 169
160 mutex_lock(&driver_lock); 170 mutex_lock(&driver_lock);
@@ -168,8 +178,12 @@ static int vfio_platform_open(void *device_data)
168 if (ret) 178 if (ret)
169 goto err_irq; 179 goto err_irq;
170 180
171 if (vdev->reset) 181 if (vdev->reset) {
182 dev_info(vdev->device, "reset\n");
172 vdev->reset(vdev); 183 vdev->reset(vdev);
184 } else {
185 dev_warn(vdev->device, "no reset function found!\n");
186 }
173 } 187 }
174 188
175 vdev->refcnt++; 189 vdev->refcnt++;
@@ -307,17 +321,17 @@ static long vfio_platform_ioctl(void *device_data,
307 return -ENOTTY; 321 return -ENOTTY;
308} 322}
309 323
310static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg, 324static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg,
311 char __user *buf, size_t count, 325 char __user *buf, size_t count,
312 loff_t off) 326 loff_t off)
313{ 327{
314 unsigned int done = 0; 328 unsigned int done = 0;
315 329
316 if (!reg.ioaddr) { 330 if (!reg->ioaddr) {
317 reg.ioaddr = 331 reg->ioaddr =
318 ioremap_nocache(reg.addr, reg.size); 332 ioremap_nocache(reg->addr, reg->size);
319 333
320 if (!reg.ioaddr) 334 if (!reg->ioaddr)
321 return -ENOMEM; 335 return -ENOMEM;
322 } 336 }
323 337
@@ -327,7 +341,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg,
327 if (count >= 4 && !(off % 4)) { 341 if (count >= 4 && !(off % 4)) {
328 u32 val; 342 u32 val;
329 343
330 val = ioread32(reg.ioaddr + off); 344 val = ioread32(reg->ioaddr + off);
331 if (copy_to_user(buf, &val, 4)) 345 if (copy_to_user(buf, &val, 4))
332 goto err; 346 goto err;
333 347
@@ -335,7 +349,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg,
335 } else if (count >= 2 && !(off % 2)) { 349 } else if (count >= 2 && !(off % 2)) {
336 u16 val; 350 u16 val;
337 351
338 val = ioread16(reg.ioaddr + off); 352 val = ioread16(reg->ioaddr + off);
339 if (copy_to_user(buf, &val, 2)) 353 if (copy_to_user(buf, &val, 2))
340 goto err; 354 goto err;
341 355
@@ -343,7 +357,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg,
343 } else { 357 } else {
344 u8 val; 358 u8 val;
345 359
346 val = ioread8(reg.ioaddr + off); 360 val = ioread8(reg->ioaddr + off);
347 if (copy_to_user(buf, &val, 1)) 361 if (copy_to_user(buf, &val, 1))
348 goto err; 362 goto err;
349 363
@@ -376,7 +390,7 @@ static ssize_t vfio_platform_read(void *device_data, char __user *buf,
376 return -EINVAL; 390 return -EINVAL;
377 391
378 if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) 392 if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO)
379 return vfio_platform_read_mmio(vdev->regions[index], 393 return vfio_platform_read_mmio(&vdev->regions[index],
380 buf, count, off); 394 buf, count, off);
381 else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) 395 else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO)
382 return -EINVAL; /* not implemented */ 396 return -EINVAL; /* not implemented */
@@ -384,17 +398,17 @@ static ssize_t vfio_platform_read(void *device_data, char __user *buf,
384 return -EINVAL; 398 return -EINVAL;
385} 399}
386 400
387static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg, 401static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg,
388 const char __user *buf, size_t count, 402 const char __user *buf, size_t count,
389 loff_t off) 403 loff_t off)
390{ 404{
391 unsigned int done = 0; 405 unsigned int done = 0;
392 406
393 if (!reg.ioaddr) { 407 if (!reg->ioaddr) {
394 reg.ioaddr = 408 reg->ioaddr =
395 ioremap_nocache(reg.addr, reg.size); 409 ioremap_nocache(reg->addr, reg->size);
396 410
397 if (!reg.ioaddr) 411 if (!reg->ioaddr)
398 return -ENOMEM; 412 return -ENOMEM;
399 } 413 }
400 414
@@ -406,7 +420,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg,
406 420
407 if (copy_from_user(&val, buf, 4)) 421 if (copy_from_user(&val, buf, 4))
408 goto err; 422 goto err;
409 iowrite32(val, reg.ioaddr + off); 423 iowrite32(val, reg->ioaddr + off);
410 424
411 filled = 4; 425 filled = 4;
412 } else if (count >= 2 && !(off % 2)) { 426 } else if (count >= 2 && !(off % 2)) {
@@ -414,7 +428,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg,
414 428
415 if (copy_from_user(&val, buf, 2)) 429 if (copy_from_user(&val, buf, 2))
416 goto err; 430 goto err;
417 iowrite16(val, reg.ioaddr + off); 431 iowrite16(val, reg->ioaddr + off);
418 432
419 filled = 2; 433 filled = 2;
420 } else { 434 } else {
@@ -422,7 +436,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg,
422 436
423 if (copy_from_user(&val, buf, 1)) 437 if (copy_from_user(&val, buf, 1))
424 goto err; 438 goto err;
425 iowrite8(val, reg.ioaddr + off); 439 iowrite8(val, reg->ioaddr + off);
426 440
427 filled = 1; 441 filled = 1;
428 } 442 }
@@ -452,7 +466,7 @@ static ssize_t vfio_platform_write(void *device_data, const char __user *buf,
452 return -EINVAL; 466 return -EINVAL;
453 467
454 if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) 468 if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO)
455 return vfio_platform_write_mmio(vdev->regions[index], 469 return vfio_platform_write_mmio(&vdev->regions[index],
456 buf, count, off); 470 buf, count, off);
457 else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) 471 else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO)
458 return -EINVAL; /* not implemented */ 472 return -EINVAL; /* not implemented */
@@ -539,6 +553,14 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
539 if (!vdev) 553 if (!vdev)
540 return -EINVAL; 554 return -EINVAL;
541 555
556 ret = device_property_read_string(dev, "compatible", &vdev->compat);
557 if (ret) {
558 pr_err("VFIO: cannot retrieve compat for %s\n", vdev->name);
559 return -EINVAL;
560 }
561
562 vdev->device = dev;
563
542 group = iommu_group_get(dev); 564 group = iommu_group_get(dev);
543 if (!group) { 565 if (!group) {
544 pr_err("VFIO: No IOMMU group for device %s\n", vdev->name); 566 pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
@@ -551,7 +573,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
551 return ret; 573 return ret;
552 } 574 }
553 575
554 vfio_platform_get_reset(vdev, dev); 576 vfio_platform_get_reset(vdev);
555 577
556 mutex_init(&vdev->igate); 578 mutex_init(&vdev->igate);
557 579
@@ -573,3 +595,34 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
573 return vdev; 595 return vdev;
574} 596}
575EXPORT_SYMBOL_GPL(vfio_platform_remove_common); 597EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
598
599void __vfio_platform_register_reset(struct vfio_platform_reset_node *node)
600{
601 mutex_lock(&driver_lock);
602 list_add(&node->link, &reset_list);
603 mutex_unlock(&driver_lock);
604}
605EXPORT_SYMBOL_GPL(__vfio_platform_register_reset);
606
607void vfio_platform_unregister_reset(const char *compat,
608 vfio_platform_reset_fn_t fn)
609{
610 struct vfio_platform_reset_node *iter, *temp;
611
612 mutex_lock(&driver_lock);
613 list_for_each_entry_safe(iter, temp, &reset_list, link) {
614 if (!strcmp(iter->compat, compat) && (iter->reset == fn)) {
615 list_del(&iter->link);
616 break;
617 }
618 }
619
620 mutex_unlock(&driver_lock);
621
622}
623EXPORT_SYMBOL_GPL(vfio_platform_unregister_reset);
624
625MODULE_VERSION(DRIVER_VERSION);
626MODULE_LICENSE("GPL v2");
627MODULE_AUTHOR(DRIVER_AUTHOR);
628MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c
index 88bba57b30a8..46d4750f43a8 100644
--- a/drivers/vfio/platform/vfio_platform_irq.c
+++ b/drivers/vfio/platform/vfio_platform_irq.c
@@ -185,6 +185,7 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index,
185 int ret; 185 int ret;
186 186
187 if (irq->trigger) { 187 if (irq->trigger) {
188 irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN);
188 free_irq(irq->hwirq, irq); 189 free_irq(irq->hwirq, irq);
189 kfree(irq->name); 190 kfree(irq->name);
190 eventfd_ctx_put(irq->trigger); 191 eventfd_ctx_put(irq->trigger);
diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h
index 1c9b3d59543c..42816dd280cb 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -56,6 +56,10 @@ struct vfio_platform_device {
56 u32 num_irqs; 56 u32 num_irqs;
57 int refcnt; 57 int refcnt;
58 struct mutex igate; 58 struct mutex igate;
59 struct module *parent_module;
60 const char *compat;
61 struct module *reset_module;
62 struct device *device;
59 63
60 /* 64 /*
61 * These fields should be filled by the bus specific binder 65 * These fields should be filled by the bus specific binder
@@ -70,10 +74,13 @@ struct vfio_platform_device {
70 int (*reset)(struct vfio_platform_device *vdev); 74 int (*reset)(struct vfio_platform_device *vdev);
71}; 75};
72 76
73struct vfio_platform_reset_combo { 77typedef int (*vfio_platform_reset_fn_t)(struct vfio_platform_device *vdev);
74 const char *compat; 78
75 const char *reset_function_name; 79struct vfio_platform_reset_node {
76 const char *module_name; 80 struct list_head link;
81 char *compat;
82 struct module *owner;
83 vfio_platform_reset_fn_t reset;
77}; 84};
78 85
79extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, 86extern int vfio_platform_probe_common(struct vfio_platform_device *vdev,
@@ -89,4 +96,29 @@ extern int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
89 unsigned start, unsigned count, 96 unsigned start, unsigned count,
90 void *data); 97 void *data);
91 98
99extern void __vfio_platform_register_reset(struct vfio_platform_reset_node *n);
100extern void vfio_platform_unregister_reset(const char *compat,
101 vfio_platform_reset_fn_t fn);
102#define vfio_platform_register_reset(__compat, __reset) \
103static struct vfio_platform_reset_node __reset ## _node = { \
104 .owner = THIS_MODULE, \
105 .compat = __compat, \
106 .reset = __reset, \
107}; \
108__vfio_platform_register_reset(&__reset ## _node)
109
110#define module_vfio_reset_handler(compat, reset) \
111MODULE_ALIAS("vfio-reset:" compat); \
112static int __init reset ## _module_init(void) \
113{ \
114 vfio_platform_register_reset(compat, reset); \
115 return 0; \
116}; \
117static void __exit reset ## _module_exit(void) \
118{ \
119 vfio_platform_unregister_reset(compat, reset); \
120}; \
121module_init(reset ## _module_init); \
122module_exit(reset ## _module_exit)
123
92#endif /* VFIO_PLATFORM_PRIVATE_H */ 124#endif /* VFIO_PLATFORM_PRIVATE_H */
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 563c510f285c..de632da2e22f 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -25,6 +25,7 @@
25#include <linux/miscdevice.h> 25#include <linux/miscdevice.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/mutex.h> 27#include <linux/mutex.h>
28#include <linux/pci.h>
28#include <linux/rwsem.h> 29#include <linux/rwsem.h>
29#include <linux/sched.h> 30#include <linux/sched.h>
30#include <linux/slab.h> 31#include <linux/slab.h>
@@ -61,6 +62,7 @@ struct vfio_container {
61 struct rw_semaphore group_lock; 62 struct rw_semaphore group_lock;
62 struct vfio_iommu_driver *iommu_driver; 63 struct vfio_iommu_driver *iommu_driver;
63 void *iommu_data; 64 void *iommu_data;
65 bool noiommu;
64}; 66};
65 67
66struct vfio_unbound_dev { 68struct vfio_unbound_dev {
@@ -83,6 +85,7 @@ struct vfio_group {
83 struct list_head unbound_list; 85 struct list_head unbound_list;
84 struct mutex unbound_lock; 86 struct mutex unbound_lock;
85 atomic_t opened; 87 atomic_t opened;
88 bool noiommu;
86}; 89};
87 90
88struct vfio_device { 91struct vfio_device {
@@ -94,6 +97,147 @@ struct vfio_device {
94 void *device_data; 97 void *device_data;
95}; 98};
96 99
100#ifdef CONFIG_VFIO_NOIOMMU
101static bool noiommu __read_mostly;
102module_param_named(enable_unsafe_noiommu_support,
103 noiommu, bool, S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
105#endif
106
107/*
108 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
109 * and remove functions, any use cases other than acquiring the first
110 * reference for the purpose of calling vfio_add_group_dev() or removing
111 * that symmetric reference after vfio_del_group_dev() should use the raw
112 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
113 * removes the device from the dummy group and cannot be nested.
114 */
115struct iommu_group *vfio_iommu_group_get(struct device *dev)
116{
117 struct iommu_group *group;
118 int __maybe_unused ret;
119
120 group = iommu_group_get(dev);
121
122#ifdef CONFIG_VFIO_NOIOMMU
123 /*
124 * With noiommu enabled, an IOMMU group will be created for a device
125 * that doesn't already have one and doesn't have an iommu_ops on their
126 * bus. We use iommu_present() again in the main code to detect these
127 * fake groups.
128 */
129 if (group || !noiommu || iommu_present(dev->bus))
130 return group;
131
132 group = iommu_group_alloc();
133 if (IS_ERR(group))
134 return NULL;
135
136 iommu_group_set_name(group, "vfio-noiommu");
137 ret = iommu_group_add_device(group, dev);
138 iommu_group_put(group);
139 if (ret)
140 return NULL;
141
142 /*
143 * Where to taint? At this point we've added an IOMMU group for a
144 * device that is not backed by iommu_ops, therefore any iommu_
145 * callback using iommu_ops can legitimately Oops. So, while we may
146 * be about to give a DMA capable device to a user without IOMMU
147 * protection, which is clearly taint-worthy, let's go ahead and do
148 * it here.
149 */
150 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
151 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
152#endif
153
154 return group;
155}
156EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
157
158void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
159{
160#ifdef CONFIG_VFIO_NOIOMMU
161 if (!iommu_present(dev->bus))
162 iommu_group_remove_device(dev);
163#endif
164
165 iommu_group_put(group);
166}
167EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
168
169#ifdef CONFIG_VFIO_NOIOMMU
170static void *vfio_noiommu_open(unsigned long arg)
171{
172 if (arg != VFIO_NOIOMMU_IOMMU)
173 return ERR_PTR(-EINVAL);
174 if (!capable(CAP_SYS_RAWIO))
175 return ERR_PTR(-EPERM);
176
177 return NULL;
178}
179
180static void vfio_noiommu_release(void *iommu_data)
181{
182}
183
184static long vfio_noiommu_ioctl(void *iommu_data,
185 unsigned int cmd, unsigned long arg)
186{
187 if (cmd == VFIO_CHECK_EXTENSION)
188 return arg == VFIO_NOIOMMU_IOMMU ? 1 : 0;
189
190 return -ENOTTY;
191}
192
193static int vfio_iommu_present(struct device *dev, void *unused)
194{
195 return iommu_present(dev->bus) ? 1 : 0;
196}
197
198static int vfio_noiommu_attach_group(void *iommu_data,
199 struct iommu_group *iommu_group)
200{
201 return iommu_group_for_each_dev(iommu_group, NULL,
202 vfio_iommu_present) ? -EINVAL : 0;
203}
204
205static void vfio_noiommu_detach_group(void *iommu_data,
206 struct iommu_group *iommu_group)
207{
208}
209
210static struct vfio_iommu_driver_ops vfio_noiommu_ops = {
211 .name = "vfio-noiommu",
212 .owner = THIS_MODULE,
213 .open = vfio_noiommu_open,
214 .release = vfio_noiommu_release,
215 .ioctl = vfio_noiommu_ioctl,
216 .attach_group = vfio_noiommu_attach_group,
217 .detach_group = vfio_noiommu_detach_group,
218};
219
220static struct vfio_iommu_driver vfio_noiommu_driver = {
221 .ops = &vfio_noiommu_ops,
222};
223
224/*
225 * Wrap IOMMU drivers, the noiommu driver is the one and only driver for
226 * noiommu groups (and thus containers) and not available for normal groups.
227 */
228#define vfio_for_each_iommu_driver(con, pos) \
229 for (pos = con->noiommu ? &vfio_noiommu_driver : \
230 list_first_entry(&vfio.iommu_drivers_list, \
231 struct vfio_iommu_driver, vfio_next); \
232 (con->noiommu ? pos != NULL : \
233 &pos->vfio_next != &vfio.iommu_drivers_list); \
234 pos = con->noiommu ? NULL : list_next_entry(pos, vfio_next))
235#else
236#define vfio_for_each_iommu_driver(con, pos) \
237 list_for_each_entry(pos, &vfio.iommu_drivers_list, vfio_next)
238#endif
239
240
97/** 241/**
98 * IOMMU driver registration 242 * IOMMU driver registration
99 */ 243 */
@@ -198,7 +342,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
198/** 342/**
199 * Group objects - create, release, get, put, search 343 * Group objects - create, release, get, put, search
200 */ 344 */
201static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) 345static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
346 bool noiommu)
202{ 347{
203 struct vfio_group *group, *tmp; 348 struct vfio_group *group, *tmp;
204 struct device *dev; 349 struct device *dev;
@@ -216,6 +361,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
216 atomic_set(&group->container_users, 0); 361 atomic_set(&group->container_users, 0);
217 atomic_set(&group->opened, 0); 362 atomic_set(&group->opened, 0);
218 group->iommu_group = iommu_group; 363 group->iommu_group = iommu_group;
364 group->noiommu = noiommu;
219 365
220 group->nb.notifier_call = vfio_iommu_group_notifier; 366 group->nb.notifier_call = vfio_iommu_group_notifier;
221 367
@@ -251,7 +397,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
251 397
252 dev = device_create(vfio.class, NULL, 398 dev = device_create(vfio.class, NULL,
253 MKDEV(MAJOR(vfio.group_devt), minor), 399 MKDEV(MAJOR(vfio.group_devt), minor),
254 group, "%d", iommu_group_id(iommu_group)); 400 group, "%s%d", noiommu ? "noiommu-" : "",
401 iommu_group_id(iommu_group));
255 if (IS_ERR(dev)) { 402 if (IS_ERR(dev)) {
256 vfio_free_group_minor(minor); 403 vfio_free_group_minor(minor);
257 vfio_group_unlock_and_free(group); 404 vfio_group_unlock_and_free(group);
@@ -438,16 +585,33 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
438} 585}
439 586
440/* 587/*
441 * Whitelist some drivers that we know are safe (no dma) or just sit on 588 * Some drivers, like pci-stub, are only used to prevent other drivers from
442 * a device. It's not always practical to leave a device within a group 589 * claiming a device and are therefore perfectly legitimate for a user owned
443 * driverless as it could get re-bound to something unsafe. 590 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
591 * of the device, but it does prevent the user from having direct access to
592 * the device, which is useful in some circumstances.
593 *
594 * We also assume that we can include PCI interconnect devices, ie. bridges.
595 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
596 * then all of the downstream devices will be part of the same IOMMU group as
597 * the bridge. Thus, if placing the bridge into the user owned IOVA space
598 * breaks anything, it only does so for user owned devices downstream. Note
599 * that error notification via MSI can be affected for platforms that handle
600 * MSI within the same IOVA space as DMA.
444 */ 601 */
445static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" }; 602static const char * const vfio_driver_whitelist[] = { "pci-stub" };
446 603
447static bool vfio_whitelisted_driver(struct device_driver *drv) 604static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
448{ 605{
449 int i; 606 int i;
450 607
608 if (dev_is_pci(dev)) {
609 struct pci_dev *pdev = to_pci_dev(dev);
610
611 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
612 return true;
613 }
614
451 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { 615 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
452 if (!strcmp(drv->name, vfio_driver_whitelist[i])) 616 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
453 return true; 617 return true;
@@ -462,6 +626,7 @@ static bool vfio_whitelisted_driver(struct device_driver *drv)
462 * - driver-less 626 * - driver-less
463 * - bound to a vfio driver 627 * - bound to a vfio driver
464 * - bound to a whitelisted driver 628 * - bound to a whitelisted driver
629 * - a PCI interconnect device
465 * 630 *
466 * We use two methods to determine whether a device is bound to a vfio 631 * We use two methods to determine whether a device is bound to a vfio
467 * driver. The first is to test whether the device exists in the vfio 632 * driver. The first is to test whether the device exists in the vfio
@@ -486,7 +651,7 @@ static int vfio_dev_viable(struct device *dev, void *data)
486 } 651 }
487 mutex_unlock(&group->unbound_lock); 652 mutex_unlock(&group->unbound_lock);
488 653
489 if (!ret || !drv || vfio_whitelisted_driver(drv)) 654 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
490 return 0; 655 return 0;
491 656
492 device = vfio_group_get_device(group, dev); 657 device = vfio_group_get_device(group, dev);
@@ -621,7 +786,8 @@ int vfio_add_group_dev(struct device *dev,
621 786
622 group = vfio_group_get_from_iommu(iommu_group); 787 group = vfio_group_get_from_iommu(iommu_group);
623 if (!group) { 788 if (!group) {
624 group = vfio_create_group(iommu_group); 789 group = vfio_create_group(iommu_group,
790 !iommu_present(dev->bus));
625 if (IS_ERR(group)) { 791 if (IS_ERR(group)) {
626 iommu_group_put(iommu_group); 792 iommu_group_put(iommu_group);
627 return PTR_ERR(group); 793 return PTR_ERR(group);
@@ -692,11 +858,12 @@ EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
692static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 858static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
693 char *buf) 859 char *buf)
694{ 860{
695 struct vfio_device *device; 861 struct vfio_device *it, *device = NULL;
696 862
697 mutex_lock(&group->device_lock); 863 mutex_lock(&group->device_lock);
698 list_for_each_entry(device, &group->device_list, group_next) { 864 list_for_each_entry(it, &group->device_list, group_next) {
699 if (!strcmp(dev_name(device->dev), buf)) { 865 if (!strcmp(dev_name(it->dev), buf)) {
866 device = it;
700 vfio_device_get(device); 867 vfio_device_get(device);
701 break; 868 break;
702 } 869 }
@@ -832,8 +999,7 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
832 */ 999 */
833 if (!driver) { 1000 if (!driver) {
834 mutex_lock(&vfio.iommu_drivers_lock); 1001 mutex_lock(&vfio.iommu_drivers_lock);
835 list_for_each_entry(driver, &vfio.iommu_drivers_list, 1002 vfio_for_each_iommu_driver(container, driver) {
836 vfio_next) {
837 if (!try_module_get(driver->ops->owner)) 1003 if (!try_module_get(driver->ops->owner))
838 continue; 1004 continue;
839 1005
@@ -902,7 +1068,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
902 } 1068 }
903 1069
904 mutex_lock(&vfio.iommu_drivers_lock); 1070 mutex_lock(&vfio.iommu_drivers_lock);
905 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 1071 vfio_for_each_iommu_driver(container, driver) {
906 void *data; 1072 void *data;
907 1073
908 if (!try_module_get(driver->ops->owner)) 1074 if (!try_module_get(driver->ops->owner))
@@ -1167,6 +1333,9 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1167 if (atomic_read(&group->container_users)) 1333 if (atomic_read(&group->container_users))
1168 return -EINVAL; 1334 return -EINVAL;
1169 1335
1336 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1337 return -EPERM;
1338
1170 f = fdget(container_fd); 1339 f = fdget(container_fd);
1171 if (!f.file) 1340 if (!f.file)
1172 return -EBADF; 1341 return -EBADF;
@@ -1182,6 +1351,13 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1182 1351
1183 down_write(&container->group_lock); 1352 down_write(&container->group_lock);
1184 1353
1354 /* Real groups and fake groups cannot mix */
1355 if (!list_empty(&container->group_list) &&
1356 container->noiommu != group->noiommu) {
1357 ret = -EPERM;
1358 goto unlock_out;
1359 }
1360
1185 driver = container->iommu_driver; 1361 driver = container->iommu_driver;
1186 if (driver) { 1362 if (driver) {
1187 ret = driver->ops->attach_group(container->iommu_data, 1363 ret = driver->ops->attach_group(container->iommu_data,
@@ -1191,6 +1367,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1191 } 1367 }
1192 1368
1193 group->container = container; 1369 group->container = container;
1370 container->noiommu = group->noiommu;
1194 list_add(&group->container_next, &container->group_list); 1371 list_add(&group->container_next, &container->group_list);
1195 1372
1196 /* Get a reference on the container and mark a user within the group */ 1373 /* Get a reference on the container and mark a user within the group */
@@ -1221,6 +1398,9 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1221 !group->container->iommu_driver || !vfio_group_viable(group)) 1398 !group->container->iommu_driver || !vfio_group_viable(group))
1222 return -EINVAL; 1399 return -EINVAL;
1223 1400
1401 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1402 return -EPERM;
1403
1224 device = vfio_device_get_from_name(group, buf); 1404 device = vfio_device_get_from_name(group, buf);
1225 if (!device) 1405 if (!device)
1226 return -ENODEV; 1406 return -ENODEV;
@@ -1263,6 +1443,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1263 1443
1264 fd_install(ret, filep); 1444 fd_install(ret, filep);
1265 1445
1446 if (group->noiommu)
1447 dev_warn(device->dev, "vfio-noiommu device opened by user "
1448 "(%s:%d)\n", current->comm, task_pid_nr(current));
1449
1266 return ret; 1450 return ret;
1267} 1451}
1268 1452
@@ -1351,6 +1535,11 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1351 if (!group) 1535 if (!group)
1352 return -ENODEV; 1536 return -ENODEV;
1353 1537
1538 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1539 vfio_group_put(group);
1540 return -EPERM;
1541 }
1542
1354 /* Do we need multiple instances of the group open? Seems not. */ 1543 /* Do we need multiple instances of the group open? Seems not. */
1355 opened = atomic_cmpxchg(&group->opened, 0, 1); 1544 opened = atomic_cmpxchg(&group->opened, 0, 1);
1356 if (opened) { 1545 if (opened) {
@@ -1513,6 +1702,11 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
1513 if (!atomic_inc_not_zero(&group->container_users)) 1702 if (!atomic_inc_not_zero(&group->container_users))
1514 return ERR_PTR(-EINVAL); 1703 return ERR_PTR(-EINVAL);
1515 1704
1705 if (group->noiommu) {
1706 atomic_dec(&group->container_users);
1707 return ERR_PTR(-EPERM);
1708 }
1709
1516 if (!group->container->iommu_driver || 1710 if (!group->container->iommu_driver ||
1517 !vfio_group_viable(group)) { 1711 !vfio_group_viable(group)) {
1518 atomic_dec(&group->container_users); 1712 atomic_dec(&group->container_users);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 57d8c37a002b..59d47cb638d5 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -403,13 +403,26 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
403static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) 403static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
404{ 404{
405 struct vfio_domain *domain; 405 struct vfio_domain *domain;
406 unsigned long bitmap = PAGE_MASK; 406 unsigned long bitmap = ULONG_MAX;
407 407
408 mutex_lock(&iommu->lock); 408 mutex_lock(&iommu->lock);
409 list_for_each_entry(domain, &iommu->domain_list, next) 409 list_for_each_entry(domain, &iommu->domain_list, next)
410 bitmap &= domain->domain->ops->pgsize_bitmap; 410 bitmap &= domain->domain->ops->pgsize_bitmap;
411 mutex_unlock(&iommu->lock); 411 mutex_unlock(&iommu->lock);
412 412
413 /*
414 * In case the IOMMU supports page sizes smaller than PAGE_SIZE
415 * we pretend PAGE_SIZE is supported and hide sub-PAGE_SIZE sizes.
416 * That way the user will be able to map/unmap buffers whose size/
417 * start address is aligned with PAGE_SIZE. Pinning code uses that
418 * granularity while iommu driver can use the sub-PAGE_SIZE size
419 * to map the buffer.
420 */
421 if (bitmap & ~PAGE_MASK) {
422 bitmap &= PAGE_MASK;
423 bitmap |= PAGE_SIZE;
424 }
425
413 return bitmap; 426 return bitmap;
414} 427}
415 428
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ddb440975382..610a86a892b8 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,6 +44,9 @@ struct vfio_device_ops {
44 void (*request)(void *device_data, unsigned int count); 44 void (*request)(void *device_data, unsigned int count);
45}; 45};
46 46
47extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
48extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
49
47extern int vfio_add_group_dev(struct device *dev, 50extern int vfio_add_group_dev(struct device *dev,
48 const struct vfio_device_ops *ops, 51 const struct vfio_device_ops *ops,
49 void *device_data); 52 void *device_data);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9fd7b5d8df2f..751b69f858c8 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -39,6 +39,13 @@
39#define VFIO_SPAPR_TCE_v2_IOMMU 7 39#define VFIO_SPAPR_TCE_v2_IOMMU 7
40 40
41/* 41/*
42 * The No-IOMMU IOMMU offers no translation or isolation for devices and
43 * supports no ioctls outside of VFIO_CHECK_EXTENSION. Use of VFIO's No-IOMMU
44 * code will taint the host kernel and should be used with extreme caution.
45 */
46#define VFIO_NOIOMMU_IOMMU 8
47
48/*
42 * The IOCTL interface is designed for extensibility by embedding the 49 * The IOCTL interface is designed for extensibility by embedding the
43 * structure length (argsz) and flags into structures passed between 50 * structure length (argsz) and flags into structures passed between
44 * kernel and userspace. We therefore use the _IO() macro for these 51 * kernel and userspace. We therefore use the _IO() macro for these