summaryrefslogtreecommitdiffstats
path: root/drivers/vfio
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-04 13:29:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-04 13:29:23 -0400
commit65b97fb7303050fc826e518cf67fc283da23314f (patch)
tree595e7f04d65d95a39d65bd2dcf2385b3b6ea7969 /drivers/vfio
parentddcf6600b133697adbafd96e080818bdc0dfd028 (diff)
parent1d8b368ab4aacfc3f864655baad4d31a3028ec1a (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc updates from Ben Herrenschmidt: "This is the powerpc changes for the 3.11 merge window. In addition to the usual bug fixes and small updates, the main highlights are: - Support for transparent huge pages by Aneesh Kumar for 64-bit server processors. This allows the use of 16M pages as transparent huge pages on kernels compiled with a 64K base page size. - Base VFIO support for KVM on power by Alexey Kardashevskiy - Wiring up of our nvram to the pstore infrastructure, including putting compressed oopses in there by Aruna Balakrishnaiah - Move, rework and improve our "EEH" (basically PCI error handling and recovery) infrastructure. It is no longer specific to pseries but is now usable by the new "powernv" platform as well (no hypervisor) by Gavin Shan. - I fixed some bugs in our math-emu instruction decoding and made it usable to emulate some optional FP instructions on processors with hard FP that lack them (such as fsqrt on Freescale embedded processors). - Support for Power8 "Event Based Branch" facility by Michael Ellerman. This facility allows what is basically "userspace interrupts" for performance monitor events. - A bunch of Transactional Memory vs. Signals bug fixes and HW breakpoint/watchpoint fixes by Michael Neuling. And more ... I appologize in advance if I've failed to highlight something that somebody deemed worth it." * 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (156 commits) pstore: Add hsize argument in write_buf call of pstore_ftrace_call powerpc/fsl: add MPIC timer wakeup support powerpc/mpic: create mpic subsystem object powerpc/mpic: add global timer support powerpc/mpic: add irq_set_wake support powerpc/85xx: enable coreint for all the 64bit boards powerpc/8xx: Erroneous double irq_eoi() on CPM IRQ in MPC8xx powerpc/fsl: Enable CONFIG_E1000E in mpc85xx_smp_defconfig powerpc/mpic: Add get_version API both for internal and external use powerpc: Handle both new style and old style reserve maps powerpc/hw_brk: Fix off by one error when validating DAWR region end powerpc/pseries: Support compression of oops text via pstore powerpc/pseries: Re-organise the oops compression code pstore: Pass header size in the pstore write callback powerpc/powernv: Fix iommu initialization again powerpc/pseries: Inform the hypervisor we are using EBB regs powerpc/perf: Add power8 EBB support powerpc/perf: Core EBB support for 64-bit book3s powerpc/perf: Drop MMCRA from thread_struct powerpc/perf: Don't enable if we have zero events ...
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/Kconfig6
-rw-r--r--drivers/vfio/Makefile1
-rw-r--r--drivers/vfio/vfio.c1
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c377
4 files changed, 385 insertions, 0 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 7cd5dec0abd1..26b3d9d1409f 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -3,10 +3,16 @@ config VFIO_IOMMU_TYPE1
3 depends on VFIO 3 depends on VFIO
4 default n 4 default n
5 5
6config VFIO_IOMMU_SPAPR_TCE
7 tristate
8 depends on VFIO && SPAPR_TCE_IOMMU
9 default n
10
6menuconfig VFIO 11menuconfig VFIO
7 tristate "VFIO Non-Privileged userspace driver framework" 12 tristate "VFIO Non-Privileged userspace driver framework"
8 depends on IOMMU_API 13 depends on IOMMU_API
9 select VFIO_IOMMU_TYPE1 if X86 14 select VFIO_IOMMU_TYPE1 if X86
15 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
10 help 16 help
11 VFIO provides a framework for secure userspace device drivers. 17 VFIO provides a framework for secure userspace device drivers.
12 See Documentation/vfio.txt for more details. 18 See Documentation/vfio.txt for more details.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 2398d4a0e38b..72bfabc8629e 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,3 +1,4 @@
1obj-$(CONFIG_VFIO) += vfio.o 1obj-$(CONFIG_VFIO) += vfio.o
2obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o 2obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
3obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
3obj-$(CONFIG_VFIO_PCI) += pci/ 4obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 6d78736563de..259ad282ae5d 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1415,6 +1415,7 @@ static int __init vfio_init(void)
1415 * drivers. 1415 * drivers.
1416 */ 1416 */
1417 request_module_nowait("vfio_iommu_type1"); 1417 request_module_nowait("vfio_iommu_type1");
1418 request_module_nowait("vfio_iommu_spapr_tce");
1418 1419
1419 return 0; 1420 return 0;
1420 1421
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
new file mode 100644
index 000000000000..bdae7a04af75
--- /dev/null
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -0,0 +1,377 @@
1/*
2 * VFIO: IOMMU DMA mapping support for TCE on POWER
3 *
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
14 */
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <asm/iommu.h>
23#include <asm/tce.h>
24
25#define DRIVER_VERSION "0.1"
26#define DRIVER_AUTHOR "aik@ozlabs.ru"
27#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
28
29static void tce_iommu_detach_group(void *iommu_data,
30 struct iommu_group *iommu_group);
31
32/*
33 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
34 *
35 * This code handles mapping and unmapping of user data buffers
36 * into DMA'ble space using the IOMMU
37 */
38
39/*
40 * The container descriptor supports only a single group per container.
41 * Required by the API as the container is not supplied with the IOMMU group
42 * at the moment of initialization.
43 */
44struct tce_container {
45 struct mutex lock;
46 struct iommu_table *tbl;
47 bool enabled;
48};
49
50static int tce_iommu_enable(struct tce_container *container)
51{
52 int ret = 0;
53 unsigned long locked, lock_limit, npages;
54 struct iommu_table *tbl = container->tbl;
55
56 if (!container->tbl)
57 return -ENXIO;
58
59 if (!current->mm)
60 return -ESRCH; /* process exited */
61
62 if (container->enabled)
63 return -EBUSY;
64
65 /*
66 * When userspace pages are mapped into the IOMMU, they are effectively
67 * locked memory, so, theoretically, we need to update the accounting
68 * of locked pages on each map and unmap. For powerpc, the map unmap
69 * paths can be very hot, though, and the accounting would kill
70 * performance, especially since it would be difficult to impossible
71 * to handle the accounting in real mode only.
72 *
73 * To address that, rather than precisely accounting every page, we
74 * instead account for a worst case on locked memory when the iommu is
75 * enabled and disabled. The worst case upper bound on locked memory
76 * is the size of the whole iommu window, which is usually relatively
77 * small (compared to total memory sizes) on POWER hardware.
78 *
79 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
80 * that would effectively kill the guest at random points, much better
81 * enforcing the limit based on the max that the guest can map.
82 */
83 down_write(&current->mm->mmap_sem);
84 npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT;
85 locked = current->mm->locked_vm + npages;
86 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
87 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
88 pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
89 rlimit(RLIMIT_MEMLOCK));
90 ret = -ENOMEM;
91 } else {
92
93 current->mm->locked_vm += npages;
94 container->enabled = true;
95 }
96 up_write(&current->mm->mmap_sem);
97
98 return ret;
99}
100
101static void tce_iommu_disable(struct tce_container *container)
102{
103 if (!container->enabled)
104 return;
105
106 container->enabled = false;
107
108 if (!container->tbl || !current->mm)
109 return;
110
111 down_write(&current->mm->mmap_sem);
112 current->mm->locked_vm -= (container->tbl->it_size <<
113 IOMMU_PAGE_SHIFT) >> PAGE_SHIFT;
114 up_write(&current->mm->mmap_sem);
115}
116
117static void *tce_iommu_open(unsigned long arg)
118{
119 struct tce_container *container;
120
121 if (arg != VFIO_SPAPR_TCE_IOMMU) {
122 pr_err("tce_vfio: Wrong IOMMU type\n");
123 return ERR_PTR(-EINVAL);
124 }
125
126 container = kzalloc(sizeof(*container), GFP_KERNEL);
127 if (!container)
128 return ERR_PTR(-ENOMEM);
129
130 mutex_init(&container->lock);
131
132 return container;
133}
134
135static void tce_iommu_release(void *iommu_data)
136{
137 struct tce_container *container = iommu_data;
138
139 WARN_ON(container->tbl && !container->tbl->it_group);
140 tce_iommu_disable(container);
141
142 if (container->tbl && container->tbl->it_group)
143 tce_iommu_detach_group(iommu_data, container->tbl->it_group);
144
145 mutex_destroy(&container->lock);
146
147 kfree(container);
148}
149
150static long tce_iommu_ioctl(void *iommu_data,
151 unsigned int cmd, unsigned long arg)
152{
153 struct tce_container *container = iommu_data;
154 unsigned long minsz;
155 long ret;
156
157 switch (cmd) {
158 case VFIO_CHECK_EXTENSION:
159 return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
160
161 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
162 struct vfio_iommu_spapr_tce_info info;
163 struct iommu_table *tbl = container->tbl;
164
165 if (WARN_ON(!tbl))
166 return -ENXIO;
167
168 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
169 dma32_window_size);
170
171 if (copy_from_user(&info, (void __user *)arg, minsz))
172 return -EFAULT;
173
174 if (info.argsz < minsz)
175 return -EINVAL;
176
177 info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT;
178 info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT;
179 info.flags = 0;
180
181 if (copy_to_user((void __user *)arg, &info, minsz))
182 return -EFAULT;
183
184 return 0;
185 }
186 case VFIO_IOMMU_MAP_DMA: {
187 struct vfio_iommu_type1_dma_map param;
188 struct iommu_table *tbl = container->tbl;
189 unsigned long tce, i;
190
191 if (!tbl)
192 return -ENXIO;
193
194 BUG_ON(!tbl->it_group);
195
196 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
197
198 if (copy_from_user(&param, (void __user *)arg, minsz))
199 return -EFAULT;
200
201 if (param.argsz < minsz)
202 return -EINVAL;
203
204 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
205 VFIO_DMA_MAP_FLAG_WRITE))
206 return -EINVAL;
207
208 if ((param.size & ~IOMMU_PAGE_MASK) ||
209 (param.vaddr & ~IOMMU_PAGE_MASK))
210 return -EINVAL;
211
212 /* iova is checked by the IOMMU API */
213 tce = param.vaddr;
214 if (param.flags & VFIO_DMA_MAP_FLAG_READ)
215 tce |= TCE_PCI_READ;
216 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
217 tce |= TCE_PCI_WRITE;
218
219 ret = iommu_tce_put_param_check(tbl, param.iova, tce);
220 if (ret)
221 return ret;
222
223 for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) {
224 ret = iommu_put_tce_user_mode(tbl,
225 (param.iova >> IOMMU_PAGE_SHIFT) + i,
226 tce);
227 if (ret)
228 break;
229 tce += IOMMU_PAGE_SIZE;
230 }
231 if (ret)
232 iommu_clear_tces_and_put_pages(tbl,
233 param.iova >> IOMMU_PAGE_SHIFT, i);
234
235 iommu_flush_tce(tbl);
236
237 return ret;
238 }
239 case VFIO_IOMMU_UNMAP_DMA: {
240 struct vfio_iommu_type1_dma_unmap param;
241 struct iommu_table *tbl = container->tbl;
242
243 if (WARN_ON(!tbl))
244 return -ENXIO;
245
246 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
247 size);
248
249 if (copy_from_user(&param, (void __user *)arg, minsz))
250 return -EFAULT;
251
252 if (param.argsz < minsz)
253 return -EINVAL;
254
255 /* No flag is supported now */
256 if (param.flags)
257 return -EINVAL;
258
259 if (param.size & ~IOMMU_PAGE_MASK)
260 return -EINVAL;
261
262 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
263 param.size >> IOMMU_PAGE_SHIFT);
264 if (ret)
265 return ret;
266
267 ret = iommu_clear_tces_and_put_pages(tbl,
268 param.iova >> IOMMU_PAGE_SHIFT,
269 param.size >> IOMMU_PAGE_SHIFT);
270 iommu_flush_tce(tbl);
271
272 return ret;
273 }
274 case VFIO_IOMMU_ENABLE:
275 mutex_lock(&container->lock);
276 ret = tce_iommu_enable(container);
277 mutex_unlock(&container->lock);
278 return ret;
279
280
281 case VFIO_IOMMU_DISABLE:
282 mutex_lock(&container->lock);
283 tce_iommu_disable(container);
284 mutex_unlock(&container->lock);
285 return 0;
286 }
287
288 return -ENOTTY;
289}
290
291static int tce_iommu_attach_group(void *iommu_data,
292 struct iommu_group *iommu_group)
293{
294 int ret;
295 struct tce_container *container = iommu_data;
296 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
297
298 BUG_ON(!tbl);
299 mutex_lock(&container->lock);
300
301 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
302 iommu_group_id(iommu_group), iommu_group); */
303 if (container->tbl) {
304 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
305 iommu_group_id(container->tbl->it_group),
306 iommu_group_id(iommu_group));
307 ret = -EBUSY;
308 } else if (container->enabled) {
309 pr_err("tce_vfio: attaching group #%u to enabled container\n",
310 iommu_group_id(iommu_group));
311 ret = -EBUSY;
312 } else {
313 ret = iommu_take_ownership(tbl);
314 if (!ret)
315 container->tbl = tbl;
316 }
317
318 mutex_unlock(&container->lock);
319
320 return ret;
321}
322
323static void tce_iommu_detach_group(void *iommu_data,
324 struct iommu_group *iommu_group)
325{
326 struct tce_container *container = iommu_data;
327 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
328
329 BUG_ON(!tbl);
330 mutex_lock(&container->lock);
331 if (tbl != container->tbl) {
332 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
333 iommu_group_id(iommu_group),
334 iommu_group_id(tbl->it_group));
335 } else {
336 if (container->enabled) {
337 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
338 iommu_group_id(tbl->it_group));
339 tce_iommu_disable(container);
340 }
341
342 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
343 iommu_group_id(iommu_group), iommu_group); */
344 container->tbl = NULL;
345 iommu_release_ownership(tbl);
346 }
347 mutex_unlock(&container->lock);
348}
349
350const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
351 .name = "iommu-vfio-powerpc",
352 .owner = THIS_MODULE,
353 .open = tce_iommu_open,
354 .release = tce_iommu_release,
355 .ioctl = tce_iommu_ioctl,
356 .attach_group = tce_iommu_attach_group,
357 .detach_group = tce_iommu_detach_group,
358};
359
360static int __init tce_iommu_init(void)
361{
362 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
363}
364
365static void __exit tce_iommu_cleanup(void)
366{
367 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
368}
369
370module_init(tce_iommu_init);
371module_exit(tce_iommu_cleanup);
372
373MODULE_VERSION(DRIVER_VERSION);
374MODULE_LICENSE("GPL v2");
375MODULE_AUTHOR(DRIVER_AUTHOR);
376MODULE_DESCRIPTION(DRIVER_DESC);
377