aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@redhat.com>2015-12-21 17:13:33 -0500
committerAlex Williamson <alex.williamson@redhat.com>2015-12-21 17:28:11 -0500
commit03a76b60f8ba27974e2d252bc555d2c103420e15 (patch)
treee22f21de57e39edab56b9761e17b202c747dec33
parent77d6bd47cc2824af016086c2bd4650685b159e22 (diff)
vfio: Include No-IOMMU mode
There is really no way to safely give a user full access to a DMA capable device without an IOMMU to protect the host system. There is also no way to provide DMA translation, for use cases such as device assignment to virtual machines. However, there are still those users that want userspace drivers even under those conditions. The UIO driver exists for this use case, but does not provide the degree of device access and programming that VFIO has. In an effort to avoid code duplication, this introduces a No-IOMMU mode for VFIO. This mode requires building VFIO with CONFIG_VFIO_NOIOMMU and enabling the "enable_unsafe_noiommu_mode" option on the vfio driver. This should make it very clear that this mode is not safe. Additionally, CAP_SYS_RAWIO privileges are necessary to work with groups and containers using this mode. Groups making use of this support are named /dev/vfio/noiommu-$GROUP and can only make use of the special VFIO_NOIOMMU_IOMMU for the container. Use of this mode, specifically binding a device without a native IOMMU group to a VFIO bus driver will taint the kernel and should therefore not be considered supported. This patch includes no-iommu support for the vfio-pci bus driver only. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com>
-rw-r--r--drivers/vfio/Kconfig15
-rw-r--r--drivers/vfio/pci/vfio_pci.c8
-rw-r--r--drivers/vfio/vfio.c184
-rw-r--r--include/linux/vfio.h3
-rw-r--r--include/uapi/linux/vfio.h7
5 files changed, 210 insertions, 7 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 850d86ca685b..da6e2ce77495 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -31,6 +31,21 @@ menuconfig VFIO
31 31
32 If you don't know what to do here, say N. 32 If you don't know what to do here, say N.
33 33
34menuconfig VFIO_NOIOMMU
35 bool "VFIO No-IOMMU support"
36 depends on VFIO
37 help
38 VFIO is built on the ability to isolate devices using the IOMMU.
39 Only with an IOMMU can userspace access to DMA capable devices be
40 considered secure. VFIO No-IOMMU mode enables IOMMU groups for
41 devices without IOMMU backing for the purpose of re-using the VFIO
42 infrastructure in a non-secure mode. Use of this mode will result
43 in an unsupportable kernel and will therefore taint the kernel.
44 Device assignment to virtual machines is also not possible with
45 this mode since there is no IOMMU to provide DMA translation.
46
47 If you don't know what to do here, say N.
48
34source "drivers/vfio/pci/Kconfig" 49source "drivers/vfio/pci/Kconfig"
35source "drivers/vfio/platform/Kconfig" 50source "drivers/vfio/platform/Kconfig"
36source "virt/lib/Kconfig" 51source "virt/lib/Kconfig"
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 56bf6dbb93db..2760a7ba3f30 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -940,13 +940,13 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
940 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) 940 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
941 return -EINVAL; 941 return -EINVAL;
942 942
943 group = iommu_group_get(&pdev->dev); 943 group = vfio_iommu_group_get(&pdev->dev);
944 if (!group) 944 if (!group)
945 return -EINVAL; 945 return -EINVAL;
946 946
947 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); 947 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
948 if (!vdev) { 948 if (!vdev) {
949 iommu_group_put(group); 949 vfio_iommu_group_put(group, &pdev->dev);
950 return -ENOMEM; 950 return -ENOMEM;
951 } 951 }
952 952
@@ -957,7 +957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
957 957
958 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); 958 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
959 if (ret) { 959 if (ret) {
960 iommu_group_put(group); 960 vfio_iommu_group_put(group, &pdev->dev);
961 kfree(vdev); 961 kfree(vdev);
962 return ret; 962 return ret;
963 } 963 }
@@ -993,7 +993,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
993 if (!vdev) 993 if (!vdev)
994 return; 994 return;
995 995
996 iommu_group_put(pdev->dev.iommu_group); 996 vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
997 kfree(vdev); 997 kfree(vdev);
998 998
999 if (vfio_pci_is_vga(pdev)) { 999 if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 6070b793cbcb..82f25cc1c460 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -62,6 +62,7 @@ struct vfio_container {
62 struct rw_semaphore group_lock; 62 struct rw_semaphore group_lock;
63 struct vfio_iommu_driver *iommu_driver; 63 struct vfio_iommu_driver *iommu_driver;
64 void *iommu_data; 64 void *iommu_data;
65 bool noiommu;
65}; 66};
66 67
67struct vfio_unbound_dev { 68struct vfio_unbound_dev {
@@ -84,6 +85,7 @@ struct vfio_group {
84 struct list_head unbound_list; 85 struct list_head unbound_list;
85 struct mutex unbound_lock; 86 struct mutex unbound_lock;
86 atomic_t opened; 87 atomic_t opened;
88 bool noiommu;
87}; 89};
88 90
89struct vfio_device { 91struct vfio_device {
@@ -95,6 +97,128 @@ struct vfio_device {
95 void *device_data; 97 void *device_data;
96}; 98};
97 99
100#ifdef CONFIG_VFIO_NOIOMMU
101static bool noiommu __read_mostly;
102module_param_named(enable_unsafe_noiommu_mode,
103 noiommu, bool, S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
105#endif
106
107/*
108 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
109 * and remove functions, any use cases other than acquiring the first
110 * reference for the purpose of calling vfio_add_group_dev() or removing
111 * that symmetric reference after vfio_del_group_dev() should use the raw
112 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
113 * removes the device from the dummy group and cannot be nested.
114 */
115struct iommu_group *vfio_iommu_group_get(struct device *dev)
116{
117 struct iommu_group *group;
118 int __maybe_unused ret;
119
120 group = iommu_group_get(dev);
121
122#ifdef CONFIG_VFIO_NOIOMMU
123 /*
124 * With noiommu enabled, an IOMMU group will be created for a device
125 * that doesn't already have one and doesn't have an iommu_ops on their
126 * bus. We use iommu_present() again in the main code to detect these
127 * fake groups.
128 */
129 if (group || !noiommu || iommu_present(dev->bus))
130 return group;
131
132 group = iommu_group_alloc();
133 if (IS_ERR(group))
134 return NULL;
135
136 iommu_group_set_name(group, "vfio-noiommu");
137 ret = iommu_group_add_device(group, dev);
138 iommu_group_put(group);
139 if (ret)
140 return NULL;
141
142 /*
143 * Where to taint? At this point we've added an IOMMU group for a
144 * device that is not backed by iommu_ops, therefore any iommu_
145 * callback using iommu_ops can legitimately Oops. So, while we may
146 * be about to give a DMA capable device to a user without IOMMU
147 * protection, which is clearly taint-worthy, let's go ahead and do
148 * it here.
149 */
150 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
151 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
152#endif
153
154 return group;
155}
156EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
157
158void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
159{
160#ifdef CONFIG_VFIO_NOIOMMU
161 if (!iommu_present(dev->bus))
162 iommu_group_remove_device(dev);
163#endif
164
165 iommu_group_put(group);
166}
167EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
168
169#ifdef CONFIG_VFIO_NOIOMMU
170static void *vfio_noiommu_open(unsigned long arg)
171{
172 if (arg != VFIO_NOIOMMU_IOMMU)
173 return ERR_PTR(-EINVAL);
174 if (!capable(CAP_SYS_RAWIO))
175 return ERR_PTR(-EPERM);
176
177 return NULL;
178}
179
180static void vfio_noiommu_release(void *iommu_data)
181{
182}
183
184static long vfio_noiommu_ioctl(void *iommu_data,
185 unsigned int cmd, unsigned long arg)
186{
187 if (cmd == VFIO_CHECK_EXTENSION)
188 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
189
190 return -ENOTTY;
191}
192
193static int vfio_iommu_present(struct device *dev, void *unused)
194{
195 return iommu_present(dev->bus) ? 1 : 0;
196}
197
198static int vfio_noiommu_attach_group(void *iommu_data,
199 struct iommu_group *iommu_group)
200{
201 return iommu_group_for_each_dev(iommu_group, NULL,
202 vfio_iommu_present) ? -EINVAL : 0;
203}
204
205static void vfio_noiommu_detach_group(void *iommu_data,
206 struct iommu_group *iommu_group)
207{
208}
209
210static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
211 .name = "vfio-noiommu",
212 .owner = THIS_MODULE,
213 .open = vfio_noiommu_open,
214 .release = vfio_noiommu_release,
215 .ioctl = vfio_noiommu_ioctl,
216 .attach_group = vfio_noiommu_attach_group,
217 .detach_group = vfio_noiommu_detach_group,
218};
219#endif
220
221
98/** 222/**
99 * IOMMU driver registration 223 * IOMMU driver registration
100 */ 224 */
@@ -199,7 +323,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
199/** 323/**
200 * Group objects - create, release, get, put, search 324 * Group objects - create, release, get, put, search
201 */ 325 */
202static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) 326static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
327 bool iommu_present)
203{ 328{
204 struct vfio_group *group, *tmp; 329 struct vfio_group *group, *tmp;
205 struct device *dev; 330 struct device *dev;
@@ -217,6 +342,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
217 atomic_set(&group->container_users, 0); 342 atomic_set(&group->container_users, 0);
218 atomic_set(&group->opened, 0); 343 atomic_set(&group->opened, 0);
219 group->iommu_group = iommu_group; 344 group->iommu_group = iommu_group;
345 group->noiommu = !iommu_present;
220 346
221 group->nb.notifier_call = vfio_iommu_group_notifier; 347 group->nb.notifier_call = vfio_iommu_group_notifier;
222 348
@@ -252,7 +378,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
252 378
253 dev = device_create(vfio.class, NULL, 379 dev = device_create(vfio.class, NULL,
254 MKDEV(MAJOR(vfio.group_devt), minor), 380 MKDEV(MAJOR(vfio.group_devt), minor),
255 group, "%d", iommu_group_id(iommu_group)); 381 group, "%s%d", group->noiommu ? "noiommu-" : "",
382 iommu_group_id(iommu_group));
256 if (IS_ERR(dev)) { 383 if (IS_ERR(dev)) {
257 vfio_free_group_minor(minor); 384 vfio_free_group_minor(minor);
258 vfio_group_unlock_and_free(group); 385 vfio_group_unlock_and_free(group);
@@ -640,7 +767,7 @@ int vfio_add_group_dev(struct device *dev,
640 767
641 group = vfio_group_get_from_iommu(iommu_group); 768 group = vfio_group_get_from_iommu(iommu_group);
642 if (!group) { 769 if (!group) {
643 group = vfio_create_group(iommu_group); 770 group = vfio_create_group(iommu_group, iommu_present(dev->bus));
644 if (IS_ERR(group)) { 771 if (IS_ERR(group)) {
645 iommu_group_put(iommu_group); 772 iommu_group_put(iommu_group);
646 return PTR_ERR(group); 773 return PTR_ERR(group);
@@ -854,6 +981,14 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
854 mutex_lock(&vfio.iommu_drivers_lock); 981 mutex_lock(&vfio.iommu_drivers_lock);
855 list_for_each_entry(driver, &vfio.iommu_drivers_list, 982 list_for_each_entry(driver, &vfio.iommu_drivers_list,
856 vfio_next) { 983 vfio_next) {
984
985#ifdef CONFIG_VFIO_NOIOMMU
986 if (!list_empty(&container->group_list) &&
987 (container->noiommu !=
988 (driver->ops == &vfio_noiommu_ops)))
989 continue;
990#endif
991
857 if (!try_module_get(driver->ops->owner)) 992 if (!try_module_get(driver->ops->owner))
858 continue; 993 continue;
859 994
@@ -925,6 +1060,15 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
925 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 1060 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
926 void *data; 1061 void *data;
927 1062
1063#ifdef CONFIG_VFIO_NOIOMMU
1064 /*
1065 * Only noiommu containers can use vfio-noiommu and noiommu
1066 * containers can only use vfio-noiommu.
1067 */
1068 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1069 continue;
1070#endif
1071
928 if (!try_module_get(driver->ops->owner)) 1072 if (!try_module_get(driver->ops->owner))
929 continue; 1073 continue;
930 1074
@@ -1187,6 +1331,9 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1187 if (atomic_read(&group->container_users)) 1331 if (atomic_read(&group->container_users))
1188 return -EINVAL; 1332 return -EINVAL;
1189 1333
1334 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1335 return -EPERM;
1336
1190 f = fdget(container_fd); 1337 f = fdget(container_fd);
1191 if (!f.file) 1338 if (!f.file)
1192 return -EBADF; 1339 return -EBADF;
@@ -1202,6 +1349,13 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1202 1349
1203 down_write(&container->group_lock); 1350 down_write(&container->group_lock);
1204 1351
1352 /* Real groups and fake groups cannot mix */
1353 if (!list_empty(&container->group_list) &&
1354 container->noiommu != group->noiommu) {
1355 ret = -EPERM;
1356 goto unlock_out;
1357 }
1358
1205 driver = container->iommu_driver; 1359 driver = container->iommu_driver;
1206 if (driver) { 1360 if (driver) {
1207 ret = driver->ops->attach_group(container->iommu_data, 1361 ret = driver->ops->attach_group(container->iommu_data,
@@ -1211,6 +1365,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1211 } 1365 }
1212 1366
1213 group->container = container; 1367 group->container = container;
1368 container->noiommu = group->noiommu;
1214 list_add(&group->container_next, &container->group_list); 1369 list_add(&group->container_next, &container->group_list);
1215 1370
1216 /* Get a reference on the container and mark a user within the group */ 1371 /* Get a reference on the container and mark a user within the group */
@@ -1241,6 +1396,9 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1241 !group->container->iommu_driver || !vfio_group_viable(group)) 1396 !group->container->iommu_driver || !vfio_group_viable(group))
1242 return -EINVAL; 1397 return -EINVAL;
1243 1398
1399 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1400 return -EPERM;
1401
1244 device = vfio_device_get_from_name(group, buf); 1402 device = vfio_device_get_from_name(group, buf);
1245 if (!device) 1403 if (!device)
1246 return -ENODEV; 1404 return -ENODEV;
@@ -1283,6 +1441,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1283 1441
1284 fd_install(ret, filep); 1442 fd_install(ret, filep);
1285 1443
1444 if (group->noiommu)
1445 dev_warn(device->dev, "vfio-noiommu device opened by user "
1446 "(%s:%d)\n", current->comm, task_pid_nr(current));
1447
1286 return ret; 1448 return ret;
1287} 1449}
1288 1450
@@ -1371,6 +1533,11 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1371 if (!group) 1533 if (!group)
1372 return -ENODEV; 1534 return -ENODEV;
1373 1535
1536 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1537 vfio_group_put(group);
1538 return -EPERM;
1539 }
1540
1374 /* Do we need multiple instances of the group open? Seems not. */ 1541 /* Do we need multiple instances of the group open? Seems not. */
1375 opened = atomic_cmpxchg(&group->opened, 0, 1); 1542 opened = atomic_cmpxchg(&group->opened, 0, 1);
1376 if (opened) { 1543 if (opened) {
@@ -1533,6 +1700,11 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
1533 if (!atomic_inc_not_zero(&group->container_users)) 1700 if (!atomic_inc_not_zero(&group->container_users))
1534 return ERR_PTR(-EINVAL); 1701 return ERR_PTR(-EINVAL);
1535 1702
1703 if (group->noiommu) {
1704 atomic_dec(&group->container_users);
1705 return ERR_PTR(-EPERM);
1706 }
1707
1536 if (!group->container->iommu_driver || 1708 if (!group->container->iommu_driver ||
1537 !vfio_group_viable(group)) { 1709 !vfio_group_viable(group)) {
1538 atomic_dec(&group->container_users); 1710 atomic_dec(&group->container_users);
@@ -1625,6 +1797,9 @@ static int __init vfio_init(void)
1625 request_module_nowait("vfio_iommu_type1"); 1797 request_module_nowait("vfio_iommu_type1");
1626 request_module_nowait("vfio_iommu_spapr_tce"); 1798 request_module_nowait("vfio_iommu_spapr_tce");
1627 1799
1800#ifdef CONFIG_VFIO_NOIOMMU
1801 vfio_register_iommu_driver(&vfio_noiommu_ops);
1802#endif
1628 return 0; 1803 return 0;
1629 1804
1630err_cdev_add: 1805err_cdev_add:
@@ -1641,6 +1816,9 @@ static void __exit vfio_cleanup(void)
1641{ 1816{
1642 WARN_ON(!list_empty(&vfio.group_list)); 1817 WARN_ON(!list_empty(&vfio.group_list));
1643 1818
1819#ifdef CONFIG_VFIO_NOIOMMU
1820 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
1821#endif
1644 idr_destroy(&vfio.group_idr); 1822 idr_destroy(&vfio.group_idr);
1645 cdev_del(&vfio.group_cdev); 1823 cdev_del(&vfio.group_cdev);
1646 unregister_chrdev_region(vfio.group_devt, MINORMASK); 1824 unregister_chrdev_region(vfio.group_devt, MINORMASK);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ddb440975382..610a86a892b8 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,6 +44,9 @@ struct vfio_device_ops {
44 void (*request)(void *device_data, unsigned int count); 44 void (*request)(void *device_data, unsigned int count);
45}; 45};
46 46
47extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
48extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
49
47extern int vfio_add_group_dev(struct device *dev, 50extern int vfio_add_group_dev(struct device *dev,
48 const struct vfio_device_ops *ops, 51 const struct vfio_device_ops *ops,
49 void *device_data); 52 void *device_data);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index d1172331ca62..7d7a4c6f2090 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -39,6 +39,13 @@
39#define VFIO_SPAPR_TCE_v2_IOMMU 7 39#define VFIO_SPAPR_TCE_v2_IOMMU 7
40 40
41/* 41/*
42 * The No-IOMMU IOMMU offers no translation or isolation for devices and
43 * supports no ioctls outside of VFIO_CHECK_EXTENSION. Use of VFIO's No-IOMMU
44 * code will taint the host kernel and should be used with extreme caution.
45 */
46#define VFIO_NOIOMMU_IOMMU 8
47
48/*
42 * The IOCTL interface is designed for extensibility by embedding the 49 * The IOCTL interface is designed for extensibility by embedding the
43 * structure length (argsz) and flags into structures passed between 50 * structure length (argsz) and flags into structures passed between
44 * kernel and userspace. We therefore use the _IO() macro for these 51 * kernel and userspace. We therefore use the _IO() macro for these