diff options
-rw-r--r-- | drivers/vfio/Kconfig | 6 | ||||
-rw-r--r-- | drivers/vfio/Makefile | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 161 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 3 | ||||
-rw-r--r-- | drivers/vfio/vfio_spapr_eeh.c | 17 | ||||
-rw-r--r-- | include/linux/vfio.h | 6 |
6 files changed, 170 insertions, 25 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index af7b204b9215..d8c57636b9ce 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig | |||
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE | |||
8 | depends on VFIO && SPAPR_TCE_IOMMU | 8 | depends on VFIO && SPAPR_TCE_IOMMU |
9 | default n | 9 | default n |
10 | 10 | ||
11 | config VFIO_SPAPR_EEH | ||
12 | tristate | ||
13 | depends on EEH && VFIO_IOMMU_SPAPR_TCE | ||
14 | default n | ||
15 | |||
11 | menuconfig VFIO | 16 | menuconfig VFIO |
12 | tristate "VFIO Non-Privileged userspace driver framework" | 17 | tristate "VFIO Non-Privileged userspace driver framework" |
13 | depends on IOMMU_API | 18 | depends on IOMMU_API |
14 | select VFIO_IOMMU_TYPE1 if X86 | 19 | select VFIO_IOMMU_TYPE1 if X86 |
15 | select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) | 20 | select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) |
21 | select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES) | ||
16 | select ANON_INODES | 22 | select ANON_INODES |
17 | help | 23 | help |
18 | VFIO provides a framework for secure userspace device drivers. | 24 | VFIO provides a framework for secure userspace device drivers. |
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 50e30bc75e85..0b035b12600a 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-$(CONFIG_VFIO) += vfio.o | 1 | obj-$(CONFIG_VFIO) += vfio.o |
2 | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o | 2 | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o |
3 | obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o | 3 | obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o |
4 | obj-$(CONFIG_EEH) += vfio_spapr_eeh.o | 4 | obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o |
5 | obj-$(CONFIG_VFIO_PCI) += pci/ | 5 | obj-$(CONFIG_VFIO_PCI) += pci/ |
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e2ee80f36e3e..f7825332a325 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c | |||
@@ -37,6 +37,10 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); | |||
37 | MODULE_PARM_DESC(nointxmask, | 37 | MODULE_PARM_DESC(nointxmask, |
38 | "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); | 38 | "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); |
39 | 39 | ||
40 | static DEFINE_MUTEX(driver_lock); | ||
41 | |||
42 | static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); | ||
43 | |||
40 | static int vfio_pci_enable(struct vfio_pci_device *vdev) | 44 | static int vfio_pci_enable(struct vfio_pci_device *vdev) |
41 | { | 45 | { |
42 | struct pci_dev *pdev = vdev->pdev; | 46 | struct pci_dev *pdev = vdev->pdev; |
@@ -44,6 +48,9 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) | |||
44 | u16 cmd; | 48 | u16 cmd; |
45 | u8 msix_pos; | 49 | u8 msix_pos; |
46 | 50 | ||
51 | /* Don't allow our initial saved state to include busmaster */ | ||
52 | pci_clear_master(pdev); | ||
53 | |||
47 | ret = pci_enable_device(pdev); | 54 | ret = pci_enable_device(pdev); |
48 | if (ret) | 55 | if (ret) |
49 | return ret; | 56 | return ret; |
@@ -99,7 +106,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) | |||
99 | struct pci_dev *pdev = vdev->pdev; | 106 | struct pci_dev *pdev = vdev->pdev; |
100 | int bar; | 107 | int bar; |
101 | 108 | ||
102 | pci_disable_device(pdev); | 109 | /* Stop the device from further DMA */ |
110 | pci_clear_master(pdev); | ||
103 | 111 | ||
104 | vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | | 112 | vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | |
105 | VFIO_IRQ_SET_ACTION_TRIGGER, | 113 | VFIO_IRQ_SET_ACTION_TRIGGER, |
@@ -117,6 +125,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) | |||
117 | vdev->barmap[bar] = NULL; | 125 | vdev->barmap[bar] = NULL; |
118 | } | 126 | } |
119 | 127 | ||
128 | vdev->needs_reset = true; | ||
129 | |||
120 | /* | 130 | /* |
121 | * If we have saved state, restore it. If we can reset the device, | 131 | * If we have saved state, restore it. If we can reset the device, |
122 | * even better. Resetting with current state seems better than | 132 | * even better. Resetting with current state seems better than |
@@ -128,7 +138,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) | |||
128 | __func__, dev_name(&pdev->dev)); | 138 | __func__, dev_name(&pdev->dev)); |
129 | 139 | ||
130 | if (!vdev->reset_works) | 140 | if (!vdev->reset_works) |
131 | return; | 141 | goto out; |
132 | 142 | ||
133 | pci_save_state(pdev); | 143 | pci_save_state(pdev); |
134 | } | 144 | } |
@@ -148,46 +158,55 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) | |||
148 | if (ret) | 158 | if (ret) |
149 | pr_warn("%s: Failed to reset device %s (%d)\n", | 159 | pr_warn("%s: Failed to reset device %s (%d)\n", |
150 | __func__, dev_name(&pdev->dev), ret); | 160 | __func__, dev_name(&pdev->dev), ret); |
161 | else | ||
162 | vdev->needs_reset = false; | ||
151 | } | 163 | } |
152 | 164 | ||
153 | pci_restore_state(pdev); | 165 | pci_restore_state(pdev); |
166 | out: | ||
167 | pci_disable_device(pdev); | ||
168 | |||
169 | vfio_pci_try_bus_reset(vdev); | ||
154 | } | 170 | } |
155 | 171 | ||
156 | static void vfio_pci_release(void *device_data) | 172 | static void vfio_pci_release(void *device_data) |
157 | { | 173 | { |
158 | struct vfio_pci_device *vdev = device_data; | 174 | struct vfio_pci_device *vdev = device_data; |
159 | 175 | ||
160 | if (atomic_dec_and_test(&vdev->refcnt)) { | 176 | mutex_lock(&driver_lock); |
177 | |||
178 | if (!(--vdev->refcnt)) { | ||
161 | vfio_spapr_pci_eeh_release(vdev->pdev); | 179 | vfio_spapr_pci_eeh_release(vdev->pdev); |
162 | vfio_pci_disable(vdev); | 180 | vfio_pci_disable(vdev); |
163 | } | 181 | } |
164 | 182 | ||
183 | mutex_unlock(&driver_lock); | ||
184 | |||
165 | module_put(THIS_MODULE); | 185 | module_put(THIS_MODULE); |
166 | } | 186 | } |
167 | 187 | ||
168 | static int vfio_pci_open(void *device_data) | 188 | static int vfio_pci_open(void *device_data) |
169 | { | 189 | { |
170 | struct vfio_pci_device *vdev = device_data; | 190 | struct vfio_pci_device *vdev = device_data; |
171 | int ret; | 191 | int ret = 0; |
172 | 192 | ||
173 | if (!try_module_get(THIS_MODULE)) | 193 | if (!try_module_get(THIS_MODULE)) |
174 | return -ENODEV; | 194 | return -ENODEV; |
175 | 195 | ||
176 | if (atomic_inc_return(&vdev->refcnt) == 1) { | 196 | mutex_lock(&driver_lock); |
197 | |||
198 | if (!vdev->refcnt) { | ||
177 | ret = vfio_pci_enable(vdev); | 199 | ret = vfio_pci_enable(vdev); |
178 | if (ret) | 200 | if (ret) |
179 | goto error; | 201 | goto error; |
180 | 202 | ||
181 | ret = vfio_spapr_pci_eeh_open(vdev->pdev); | 203 | vfio_spapr_pci_eeh_open(vdev->pdev); |
182 | if (ret) { | ||
183 | vfio_pci_disable(vdev); | ||
184 | goto error; | ||
185 | } | ||
186 | } | 204 | } |
187 | 205 | vdev->refcnt++; | |
188 | return 0; | ||
189 | error: | 206 | error: |
190 | module_put(THIS_MODULE); | 207 | mutex_unlock(&driver_lock); |
208 | if (ret) | ||
209 | module_put(THIS_MODULE); | ||
191 | return ret; | 210 | return ret; |
192 | } | 211 | } |
193 | 212 | ||
@@ -843,7 +862,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
843 | vdev->irq_type = VFIO_PCI_NUM_IRQS; | 862 | vdev->irq_type = VFIO_PCI_NUM_IRQS; |
844 | mutex_init(&vdev->igate); | 863 | mutex_init(&vdev->igate); |
845 | spin_lock_init(&vdev->irqlock); | 864 | spin_lock_init(&vdev->irqlock); |
846 | atomic_set(&vdev->refcnt, 0); | ||
847 | 865 | ||
848 | ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); | 866 | ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); |
849 | if (ret) { | 867 | if (ret) { |
@@ -858,12 +876,15 @@ static void vfio_pci_remove(struct pci_dev *pdev) | |||
858 | { | 876 | { |
859 | struct vfio_pci_device *vdev; | 877 | struct vfio_pci_device *vdev; |
860 | 878 | ||
879 | mutex_lock(&driver_lock); | ||
880 | |||
861 | vdev = vfio_del_group_dev(&pdev->dev); | 881 | vdev = vfio_del_group_dev(&pdev->dev); |
862 | if (!vdev) | 882 | if (vdev) { |
863 | return; | 883 | iommu_group_put(pdev->dev.iommu_group); |
884 | kfree(vdev); | ||
885 | } | ||
864 | 886 | ||
865 | iommu_group_put(pdev->dev.iommu_group); | 887 | mutex_unlock(&driver_lock); |
866 | kfree(vdev); | ||
867 | } | 888 | } |
868 | 889 | ||
869 | static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, | 890 | static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, |
@@ -906,6 +927,110 @@ static struct pci_driver vfio_pci_driver = { | |||
906 | .err_handler = &vfio_err_handlers, | 927 | .err_handler = &vfio_err_handlers, |
907 | }; | 928 | }; |
908 | 929 | ||
930 | /* | ||
931 | * Test whether a reset is necessary and possible. We mark devices as | ||
932 | * needs_reset when they are released, but don't have a function-local reset | ||
933 | * available. If any of these exist in the affected devices, we want to do | ||
934 | * a bus/slot reset. We also need all of the affected devices to be unused, | ||
935 | * so we abort if any device has a non-zero refcnt. driver_lock prevents a | ||
936 | * device from being opened during the scan or unbound from vfio-pci. | ||
937 | */ | ||
938 | static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data) | ||
939 | { | ||
940 | bool *needs_reset = data; | ||
941 | struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); | ||
942 | int ret = -EBUSY; | ||
943 | |||
944 | if (pci_drv == &vfio_pci_driver) { | ||
945 | struct vfio_device *device; | ||
946 | struct vfio_pci_device *vdev; | ||
947 | |||
948 | device = vfio_device_get_from_dev(&pdev->dev); | ||
949 | if (!device) | ||
950 | return ret; | ||
951 | |||
952 | vdev = vfio_device_data(device); | ||
953 | if (vdev) { | ||
954 | if (vdev->needs_reset) | ||
955 | *needs_reset = true; | ||
956 | |||
957 | if (!vdev->refcnt) | ||
958 | ret = 0; | ||
959 | } | ||
960 | |||
961 | vfio_device_put(device); | ||
962 | } | ||
963 | |||
964 | /* | ||
965 | * TODO: vfio-core considers groups to be viable even if some devices | ||
966 | * are attached to known drivers, like pci-stub or pcieport. We can't | ||
967 | * freeze devices from being unbound to those drivers like we can | ||
968 | * here though, so it would be racy to test for them. We also can't | ||
969 | * use device_lock() to prevent changes as that would interfere with | ||
970 | * PCI-core taking device_lock during bus reset. For now, we require | ||
971 | * devices to be bound to vfio-pci to get a bus/slot reset on release. | ||
972 | */ | ||
973 | |||
974 | return ret; | ||
975 | } | ||
976 | |||
977 | /* Clear needs_reset on all affected devices after successful bus/slot reset */ | ||
978 | static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data) | ||
979 | { | ||
980 | struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); | ||
981 | |||
982 | if (pci_drv == &vfio_pci_driver) { | ||
983 | struct vfio_device *device; | ||
984 | struct vfio_pci_device *vdev; | ||
985 | |||
986 | device = vfio_device_get_from_dev(&pdev->dev); | ||
987 | if (!device) | ||
988 | return 0; | ||
989 | |||
990 | vdev = vfio_device_data(device); | ||
991 | if (vdev) | ||
992 | vdev->needs_reset = false; | ||
993 | |||
994 | vfio_device_put(device); | ||
995 | } | ||
996 | |||
997 | return 0; | ||
998 | } | ||
999 | |||
1000 | /* | ||
1001 | * Attempt to do a bus/slot reset if there are devices affected by a reset for | ||
1002 | * this device that are needs_reset and all of the affected devices are unused | ||
1003 | * (!refcnt). Callers of this function are required to hold driver_lock such | ||
1004 | * that devices can not be unbound from vfio-pci or opened by a user while we | ||
1005 | * test for and perform a bus/slot reset. | ||
1006 | */ | ||
1007 | static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) | ||
1008 | { | ||
1009 | bool needs_reset = false, slot = false; | ||
1010 | int ret; | ||
1011 | |||
1012 | if (!pci_probe_reset_slot(vdev->pdev->slot)) | ||
1013 | slot = true; | ||
1014 | else if (pci_probe_reset_bus(vdev->pdev->bus)) | ||
1015 | return; | ||
1016 | |||
1017 | if (vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
1018 | vfio_pci_test_bus_reset, | ||
1019 | &needs_reset, slot) || !needs_reset) | ||
1020 | return; | ||
1021 | |||
1022 | if (slot) | ||
1023 | ret = pci_try_reset_slot(vdev->pdev->slot); | ||
1024 | else | ||
1025 | ret = pci_try_reset_bus(vdev->pdev->bus); | ||
1026 | |||
1027 | if (ret) | ||
1028 | return; | ||
1029 | |||
1030 | vfio_pci_for_each_slot_or_bus(vdev->pdev, | ||
1031 | vfio_pci_clear_needs_reset, NULL, slot); | ||
1032 | } | ||
1033 | |||
909 | static void __exit vfio_pci_cleanup(void) | 1034 | static void __exit vfio_pci_cleanup(void) |
910 | { | 1035 | { |
911 | pci_unregister_driver(&vfio_pci_driver); | 1036 | pci_unregister_driver(&vfio_pci_driver); |
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 9c6d5d0f3b02..671c17a6e6d0 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h | |||
@@ -54,8 +54,9 @@ struct vfio_pci_device { | |||
54 | bool extended_caps; | 54 | bool extended_caps; |
55 | bool bardirty; | 55 | bool bardirty; |
56 | bool has_vga; | 56 | bool has_vga; |
57 | bool needs_reset; | ||
57 | struct pci_saved_state *pci_saved_state; | 58 | struct pci_saved_state *pci_saved_state; |
58 | atomic_t refcnt; | 59 | int refcnt; |
59 | struct eventfd_ctx *err_trigger; | 60 | struct eventfd_ctx *err_trigger; |
60 | }; | 61 | }; |
61 | 62 | ||
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index f834b4ce1431..86dfceb9201f 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c | |||
@@ -9,20 +9,27 @@ | |||
9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/module.h> | ||
12 | #include <linux/uaccess.h> | 13 | #include <linux/uaccess.h> |
13 | #include <linux/vfio.h> | 14 | #include <linux/vfio.h> |
14 | #include <asm/eeh.h> | 15 | #include <asm/eeh.h> |
15 | 16 | ||
17 | #define DRIVER_VERSION "0.1" | ||
18 | #define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" | ||
19 | #define DRIVER_DESC "VFIO IOMMU SPAPR EEH" | ||
20 | |||
16 | /* We might build address mapping here for "fast" path later */ | 21 | /* We might build address mapping here for "fast" path later */ |
17 | int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) | 22 | void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) |
18 | { | 23 | { |
19 | return eeh_dev_open(pdev); | 24 | eeh_dev_open(pdev); |
20 | } | 25 | } |
26 | EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); | ||
21 | 27 | ||
22 | void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) | 28 | void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) |
23 | { | 29 | { |
24 | eeh_dev_release(pdev); | 30 | eeh_dev_release(pdev); |
25 | } | 31 | } |
32 | EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release); | ||
26 | 33 | ||
27 | long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, | 34 | long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, |
28 | unsigned int cmd, unsigned long arg) | 35 | unsigned int cmd, unsigned long arg) |
@@ -85,3 +92,9 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, | |||
85 | 92 | ||
86 | return ret; | 93 | return ret; |
87 | } | 94 | } |
95 | EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl); | ||
96 | |||
97 | MODULE_VERSION(DRIVER_VERSION); | ||
98 | MODULE_LICENSE("GPL v2"); | ||
99 | MODULE_AUTHOR(DRIVER_AUTHOR); | ||
100 | MODULE_DESCRIPTION(DRIVER_DESC); | ||
diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 25a0fbd4b998..d3204115f15d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h | |||
@@ -98,16 +98,16 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); | |||
98 | extern long vfio_external_check_extension(struct vfio_group *group, | 98 | extern long vfio_external_check_extension(struct vfio_group *group, |
99 | unsigned long arg); | 99 | unsigned long arg); |
100 | 100 | ||
101 | struct pci_dev; | ||
101 | #ifdef CONFIG_EEH | 102 | #ifdef CONFIG_EEH |
102 | extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); | 103 | extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); |
103 | extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); | 104 | extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); |
104 | extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, | 105 | extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, |
105 | unsigned int cmd, | 106 | unsigned int cmd, |
106 | unsigned long arg); | 107 | unsigned long arg); |
107 | #else | 108 | #else |
108 | static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) | 109 | static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) |
109 | { | 110 | { |
110 | return 0; | ||
111 | } | 111 | } |
112 | 112 | ||
113 | static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) | 113 | static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) |