diff options
author | Alexey Kardashevskiy <aik@ozlabs.ru> | 2013-05-20 23:33:10 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2013-06-20 02:55:14 -0400 |
commit | 5ffd229c02731a91d08ca21e76b503c5bbb5c095 (patch) | |
tree | ed5f684cb6a5b683846d6b3d97fa2c0999835c24 /drivers/vfio | |
parent | 4e13c1ac6baa1d6c2b650d66ca89e1e12727ec19 (diff) |
powerpc/vfio: Implement IOMMU driver for VFIO
VFIO implements platform independent stuff such as
a PCI driver, BAR access (via read/write on a file descriptor
or direct mapping when possible) and IRQ signaling.
The platform dependent part includes IOMMU initialization
and handling. This implements an IOMMU driver for VFIO
which does mapping/unmapping pages for the guest IO and
provides information about DMA window (required by a POWER
guest).
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/Kconfig | 6 | ||||
-rw-r--r-- | drivers/vfio/Makefile | 1 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 1 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 377 |
4 files changed, 385 insertions, 0 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 7cd5dec0abd1..b464687f6e14 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig | |||
@@ -3,10 +3,16 @@ config VFIO_IOMMU_TYPE1 | |||
3 | depends on VFIO | 3 | depends on VFIO |
4 | default n | 4 | default n |
5 | 5 | ||
6 | config VFIO_IOMMU_SPAPR_TCE | ||
7 | tristate | ||
8 | depends on VFIO && SPAPR_TCE_IOMMU | ||
9 | default n | ||
10 | |||
6 | menuconfig VFIO | 11 | menuconfig VFIO |
7 | tristate "VFIO Non-Privileged userspace driver framework" | 12 | tristate "VFIO Non-Privileged userspace driver framework" |
8 | depends on IOMMU_API | 13 | depends on IOMMU_API |
9 | select VFIO_IOMMU_TYPE1 if X86 | 14 | select VFIO_IOMMU_TYPE1 if X86 |
15 | select VFIO_IOMMU_SPAPR_TCE if PPC_POWERNV | ||
10 | help | 16 | help |
11 | VFIO provides a framework for secure userspace device drivers. | 17 | VFIO provides a framework for secure userspace device drivers. |
12 | See Documentation/vfio.txt for more details. | 18 | See Documentation/vfio.txt for more details. |
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 2398d4a0e38b..72bfabc8629e 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile | |||
@@ -1,3 +1,4 @@ | |||
1 | obj-$(CONFIG_VFIO) += vfio.o | 1 | obj-$(CONFIG_VFIO) += vfio.o |
2 | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o | 2 | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o |
3 | obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o | ||
3 | obj-$(CONFIG_VFIO_PCI) += pci/ | 4 | obj-$(CONFIG_VFIO_PCI) += pci/ |
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 6d78736563de..259ad282ae5d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c | |||
@@ -1415,6 +1415,7 @@ static int __init vfio_init(void) | |||
1415 | * drivers. | 1415 | * drivers. |
1416 | */ | 1416 | */ |
1417 | request_module_nowait("vfio_iommu_type1"); | 1417 | request_module_nowait("vfio_iommu_type1"); |
1418 | request_module_nowait("vfio_iommu_spapr_tce"); | ||
1418 | 1419 | ||
1419 | return 0; | 1420 | return 0; |
1420 | 1421 | ||
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c new file mode 100644 index 000000000000..bdae7a04af75 --- /dev/null +++ b/drivers/vfio/vfio_iommu_spapr_tce.c | |||
@@ -0,0 +1,377 @@ | |||
1 | /* | ||
2 | * VFIO: IOMMU DMA mapping support for TCE on POWER | ||
3 | * | ||
4 | * Copyright (C) 2013 IBM Corp. All rights reserved. | ||
5 | * Author: Alexey Kardashevskiy <aik@ozlabs.ru> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * Derived from original vfio_iommu_type1.c: | ||
12 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | ||
13 | * Author: Alex Williamson <alex.williamson@redhat.com> | ||
14 | */ | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/pci.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/uaccess.h> | ||
20 | #include <linux/err.h> | ||
21 | #include <linux/vfio.h> | ||
22 | #include <asm/iommu.h> | ||
23 | #include <asm/tce.h> | ||
24 | |||
25 | #define DRIVER_VERSION "0.1" | ||
26 | #define DRIVER_AUTHOR "aik@ozlabs.ru" | ||
27 | #define DRIVER_DESC "VFIO IOMMU SPAPR TCE" | ||
28 | |||
29 | static void tce_iommu_detach_group(void *iommu_data, | ||
30 | struct iommu_group *iommu_group); | ||
31 | |||
32 | /* | ||
33 | * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation | ||
34 | * | ||
35 | * This code handles mapping and unmapping of user data buffers | ||
36 | * into DMA'ble space using the IOMMU | ||
37 | */ | ||
38 | |||
39 | /* | ||
40 | * The container descriptor supports only a single group per container. | ||
41 | * Required by the API as the container is not supplied with the IOMMU group | ||
42 | * at the moment of initialization. | ||
43 | */ | ||
44 | struct tce_container { | ||
45 | struct mutex lock; | ||
46 | struct iommu_table *tbl; | ||
47 | bool enabled; | ||
48 | }; | ||
49 | |||
50 | static int tce_iommu_enable(struct tce_container *container) | ||
51 | { | ||
52 | int ret = 0; | ||
53 | unsigned long locked, lock_limit, npages; | ||
54 | struct iommu_table *tbl = container->tbl; | ||
55 | |||
56 | if (!container->tbl) | ||
57 | return -ENXIO; | ||
58 | |||
59 | if (!current->mm) | ||
60 | return -ESRCH; /* process exited */ | ||
61 | |||
62 | if (container->enabled) | ||
63 | return -EBUSY; | ||
64 | |||
65 | /* | ||
66 | * When userspace pages are mapped into the IOMMU, they are effectively | ||
67 | * locked memory, so, theoretically, we need to update the accounting | ||
68 | * of locked pages on each map and unmap. For powerpc, the map unmap | ||
69 | * paths can be very hot, though, and the accounting would kill | ||
70 | * performance, especially since it would be difficult to impossible | ||
71 | * to handle the accounting in real mode only. | ||
72 | * | ||
73 | * To address that, rather than precisely accounting every page, we | ||
74 | * instead account for a worst case on locked memory when the iommu is | ||
75 | * enabled and disabled. The worst case upper bound on locked memory | ||
76 | * is the size of the whole iommu window, which is usually relatively | ||
77 | * small (compared to total memory sizes) on POWER hardware. | ||
78 | * | ||
79 | * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, | ||
80 | * that would effectively kill the guest at random points, much better | ||
81 | * enforcing the limit based on the max that the guest can map. | ||
82 | */ | ||
83 | down_write(¤t->mm->mmap_sem); | ||
84 | npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; | ||
85 | locked = current->mm->locked_vm + npages; | ||
86 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
87 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { | ||
88 | pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", | ||
89 | rlimit(RLIMIT_MEMLOCK)); | ||
90 | ret = -ENOMEM; | ||
91 | } else { | ||
92 | |||
93 | current->mm->locked_vm += npages; | ||
94 | container->enabled = true; | ||
95 | } | ||
96 | up_write(¤t->mm->mmap_sem); | ||
97 | |||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | static void tce_iommu_disable(struct tce_container *container) | ||
102 | { | ||
103 | if (!container->enabled) | ||
104 | return; | ||
105 | |||
106 | container->enabled = false; | ||
107 | |||
108 | if (!container->tbl || !current->mm) | ||
109 | return; | ||
110 | |||
111 | down_write(¤t->mm->mmap_sem); | ||
112 | current->mm->locked_vm -= (container->tbl->it_size << | ||
113 | IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; | ||
114 | up_write(¤t->mm->mmap_sem); | ||
115 | } | ||
116 | |||
117 | static void *tce_iommu_open(unsigned long arg) | ||
118 | { | ||
119 | struct tce_container *container; | ||
120 | |||
121 | if (arg != VFIO_SPAPR_TCE_IOMMU) { | ||
122 | pr_err("tce_vfio: Wrong IOMMU type\n"); | ||
123 | return ERR_PTR(-EINVAL); | ||
124 | } | ||
125 | |||
126 | container = kzalloc(sizeof(*container), GFP_KERNEL); | ||
127 | if (!container) | ||
128 | return ERR_PTR(-ENOMEM); | ||
129 | |||
130 | mutex_init(&container->lock); | ||
131 | |||
132 | return container; | ||
133 | } | ||
134 | |||
135 | static void tce_iommu_release(void *iommu_data) | ||
136 | { | ||
137 | struct tce_container *container = iommu_data; | ||
138 | |||
139 | WARN_ON(container->tbl && !container->tbl->it_group); | ||
140 | tce_iommu_disable(container); | ||
141 | |||
142 | if (container->tbl && container->tbl->it_group) | ||
143 | tce_iommu_detach_group(iommu_data, container->tbl->it_group); | ||
144 | |||
145 | mutex_destroy(&container->lock); | ||
146 | |||
147 | kfree(container); | ||
148 | } | ||
149 | |||
150 | static long tce_iommu_ioctl(void *iommu_data, | ||
151 | unsigned int cmd, unsigned long arg) | ||
152 | { | ||
153 | struct tce_container *container = iommu_data; | ||
154 | unsigned long minsz; | ||
155 | long ret; | ||
156 | |||
157 | switch (cmd) { | ||
158 | case VFIO_CHECK_EXTENSION: | ||
159 | return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; | ||
160 | |||
161 | case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { | ||
162 | struct vfio_iommu_spapr_tce_info info; | ||
163 | struct iommu_table *tbl = container->tbl; | ||
164 | |||
165 | if (WARN_ON(!tbl)) | ||
166 | return -ENXIO; | ||
167 | |||
168 | minsz = offsetofend(struct vfio_iommu_spapr_tce_info, | ||
169 | dma32_window_size); | ||
170 | |||
171 | if (copy_from_user(&info, (void __user *)arg, minsz)) | ||
172 | return -EFAULT; | ||
173 | |||
174 | if (info.argsz < minsz) | ||
175 | return -EINVAL; | ||
176 | |||
177 | info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT; | ||
178 | info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT; | ||
179 | info.flags = 0; | ||
180 | |||
181 | if (copy_to_user((void __user *)arg, &info, minsz)) | ||
182 | return -EFAULT; | ||
183 | |||
184 | return 0; | ||
185 | } | ||
186 | case VFIO_IOMMU_MAP_DMA: { | ||
187 | struct vfio_iommu_type1_dma_map param; | ||
188 | struct iommu_table *tbl = container->tbl; | ||
189 | unsigned long tce, i; | ||
190 | |||
191 | if (!tbl) | ||
192 | return -ENXIO; | ||
193 | |||
194 | BUG_ON(!tbl->it_group); | ||
195 | |||
196 | minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); | ||
197 | |||
198 | if (copy_from_user(¶m, (void __user *)arg, minsz)) | ||
199 | return -EFAULT; | ||
200 | |||
201 | if (param.argsz < minsz) | ||
202 | return -EINVAL; | ||
203 | |||
204 | if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | | ||
205 | VFIO_DMA_MAP_FLAG_WRITE)) | ||
206 | return -EINVAL; | ||
207 | |||
208 | if ((param.size & ~IOMMU_PAGE_MASK) || | ||
209 | (param.vaddr & ~IOMMU_PAGE_MASK)) | ||
210 | return -EINVAL; | ||
211 | |||
212 | /* iova is checked by the IOMMU API */ | ||
213 | tce = param.vaddr; | ||
214 | if (param.flags & VFIO_DMA_MAP_FLAG_READ) | ||
215 | tce |= TCE_PCI_READ; | ||
216 | if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) | ||
217 | tce |= TCE_PCI_WRITE; | ||
218 | |||
219 | ret = iommu_tce_put_param_check(tbl, param.iova, tce); | ||
220 | if (ret) | ||
221 | return ret; | ||
222 | |||
223 | for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) { | ||
224 | ret = iommu_put_tce_user_mode(tbl, | ||
225 | (param.iova >> IOMMU_PAGE_SHIFT) + i, | ||
226 | tce); | ||
227 | if (ret) | ||
228 | break; | ||
229 | tce += IOMMU_PAGE_SIZE; | ||
230 | } | ||
231 | if (ret) | ||
232 | iommu_clear_tces_and_put_pages(tbl, | ||
233 | param.iova >> IOMMU_PAGE_SHIFT, i); | ||
234 | |||
235 | iommu_flush_tce(tbl); | ||
236 | |||
237 | return ret; | ||
238 | } | ||
239 | case VFIO_IOMMU_UNMAP_DMA: { | ||
240 | struct vfio_iommu_type1_dma_unmap param; | ||
241 | struct iommu_table *tbl = container->tbl; | ||
242 | |||
243 | if (WARN_ON(!tbl)) | ||
244 | return -ENXIO; | ||
245 | |||
246 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, | ||
247 | size); | ||
248 | |||
249 | if (copy_from_user(¶m, (void __user *)arg, minsz)) | ||
250 | return -EFAULT; | ||
251 | |||
252 | if (param.argsz < minsz) | ||
253 | return -EINVAL; | ||
254 | |||
255 | /* No flag is supported now */ | ||
256 | if (param.flags) | ||
257 | return -EINVAL; | ||
258 | |||
259 | if (param.size & ~IOMMU_PAGE_MASK) | ||
260 | return -EINVAL; | ||
261 | |||
262 | ret = iommu_tce_clear_param_check(tbl, param.iova, 0, | ||
263 | param.size >> IOMMU_PAGE_SHIFT); | ||
264 | if (ret) | ||
265 | return ret; | ||
266 | |||
267 | ret = iommu_clear_tces_and_put_pages(tbl, | ||
268 | param.iova >> IOMMU_PAGE_SHIFT, | ||
269 | param.size >> IOMMU_PAGE_SHIFT); | ||
270 | iommu_flush_tce(tbl); | ||
271 | |||
272 | return ret; | ||
273 | } | ||
274 | case VFIO_IOMMU_ENABLE: | ||
275 | mutex_lock(&container->lock); | ||
276 | ret = tce_iommu_enable(container); | ||
277 | mutex_unlock(&container->lock); | ||
278 | return ret; | ||
279 | |||
280 | |||
281 | case VFIO_IOMMU_DISABLE: | ||
282 | mutex_lock(&container->lock); | ||
283 | tce_iommu_disable(container); | ||
284 | mutex_unlock(&container->lock); | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | return -ENOTTY; | ||
289 | } | ||
290 | |||
291 | static int tce_iommu_attach_group(void *iommu_data, | ||
292 | struct iommu_group *iommu_group) | ||
293 | { | ||
294 | int ret; | ||
295 | struct tce_container *container = iommu_data; | ||
296 | struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); | ||
297 | |||
298 | BUG_ON(!tbl); | ||
299 | mutex_lock(&container->lock); | ||
300 | |||
301 | /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", | ||
302 | iommu_group_id(iommu_group), iommu_group); */ | ||
303 | if (container->tbl) { | ||
304 | pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", | ||
305 | iommu_group_id(container->tbl->it_group), | ||
306 | iommu_group_id(iommu_group)); | ||
307 | ret = -EBUSY; | ||
308 | } else if (container->enabled) { | ||
309 | pr_err("tce_vfio: attaching group #%u to enabled container\n", | ||
310 | iommu_group_id(iommu_group)); | ||
311 | ret = -EBUSY; | ||
312 | } else { | ||
313 | ret = iommu_take_ownership(tbl); | ||
314 | if (!ret) | ||
315 | container->tbl = tbl; | ||
316 | } | ||
317 | |||
318 | mutex_unlock(&container->lock); | ||
319 | |||
320 | return ret; | ||
321 | } | ||
322 | |||
323 | static void tce_iommu_detach_group(void *iommu_data, | ||
324 | struct iommu_group *iommu_group) | ||
325 | { | ||
326 | struct tce_container *container = iommu_data; | ||
327 | struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); | ||
328 | |||
329 | BUG_ON(!tbl); | ||
330 | mutex_lock(&container->lock); | ||
331 | if (tbl != container->tbl) { | ||
332 | pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", | ||
333 | iommu_group_id(iommu_group), | ||
334 | iommu_group_id(tbl->it_group)); | ||
335 | } else { | ||
336 | if (container->enabled) { | ||
337 | pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", | ||
338 | iommu_group_id(tbl->it_group)); | ||
339 | tce_iommu_disable(container); | ||
340 | } | ||
341 | |||
342 | /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", | ||
343 | iommu_group_id(iommu_group), iommu_group); */ | ||
344 | container->tbl = NULL; | ||
345 | iommu_release_ownership(tbl); | ||
346 | } | ||
347 | mutex_unlock(&container->lock); | ||
348 | } | ||
349 | |||
350 | const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { | ||
351 | .name = "iommu-vfio-powerpc", | ||
352 | .owner = THIS_MODULE, | ||
353 | .open = tce_iommu_open, | ||
354 | .release = tce_iommu_release, | ||
355 | .ioctl = tce_iommu_ioctl, | ||
356 | .attach_group = tce_iommu_attach_group, | ||
357 | .detach_group = tce_iommu_detach_group, | ||
358 | }; | ||
359 | |||
360 | static int __init tce_iommu_init(void) | ||
361 | { | ||
362 | return vfio_register_iommu_driver(&tce_iommu_driver_ops); | ||
363 | } | ||
364 | |||
365 | static void __exit tce_iommu_cleanup(void) | ||
366 | { | ||
367 | vfio_unregister_iommu_driver(&tce_iommu_driver_ops); | ||
368 | } | ||
369 | |||
370 | module_init(tce_iommu_init); | ||
371 | module_exit(tce_iommu_cleanup); | ||
372 | |||
373 | MODULE_VERSION(DRIVER_VERSION); | ||
374 | MODULE_LICENSE("GPL v2"); | ||
375 | MODULE_AUTHOR(DRIVER_AUTHOR); | ||
376 | MODULE_DESCRIPTION(DRIVER_DESC); | ||
377 | |||