diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 11:46:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 11:46:32 -0400 |
commit | 08d183e3c1f650b4db1d07d764502116861542fa (patch) | |
tree | f868a813f36744597bc7a8260c63cd37a3a94338 /drivers/vfio | |
parent | 4b1f2af6752a4cc9acc1c22ddf3842478965f113 (diff) | |
parent | 6096f884515466f400864ad23d16f20b731a7ce7 (diff) |
Merge tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
Pull powerpc updates from Michael Ellerman:
- disable the 32-bit vdso when building LE, so we can build with a
64-bit only toolchain.
- EEH fixes from Gavin & Richard.
- enable the sys_kcmp syscall from Laurent.
- sysfs control for fastsleep workaround from Shreyas.
- expose OPAL events as an irq chip by Alistair.
- MSI ops moved to pci_controller_ops by Daniel.
- fix for kernel to userspace backtraces for perf from Anton.
- merge pseries and pseries_le defconfigs from Cyril.
- CXL in-kernel API from Mikey.
- OPAL prd driver from Jeremy.
- fix for DSCR handling & tests from Anshuman.
- Powernv flash mtd driver from Cyril.
- dynamic DMA Window support on powernv from Alexey.
- LLVM clang fixes & workarounds from Anton.
- reworked version of the patch to abort syscalls when transactional.
- fix the swap encoding to support 4TB, from Aneesh.
- various fixes as usual.
- Freescale updates from Scott: Highlights include more 8xx
optimizations, an e6500 hugetlb optimization, QMan device tree nodes,
t1024/t1023 support, and various fixes and cleanup.
* tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: (180 commits)
cxl: Fix typo in debug print
cxl: Add CXL_KERNEL_API config option
powerpc/powernv: Fix wrong IOMMU table in pnv_ioda_setup_bus_dma()
powerpc/mm: Change the swap encoding in pte.
powerpc/mm: PTE_RPN_MAX is not used, remove the same
powerpc/tm: Abort syscalls in active transactions
powerpc/iommu/ioda2: Enable compile with IOV=on and IOMMU_API=off
powerpc/include: Add opal-prd to installed uapi headers
powerpc/powernv: fix construction of opal PRD messages
powerpc/powernv: Increase opal-irqchip initcall priority
powerpc: Make doorbell check preemption safe
powerpc/powernv: pnv_init_idle_states() should only run on powernv
macintosh/nvram: Remove as unused
powerpc: Don't use gcc specific options on clang
powerpc: Don't use -mno-strict-align on clang
powerpc: Only use -mtraceback=no, -mno-string and -msoft-float if toolchain supports it
powerpc: Only use -mabi=altivec if toolchain supports it
powerpc: Fix duplicate const clang warning in user access code
vfio: powerpc/spapr: Support Dynamic DMA windows
vfio: powerpc/spapr: Register memory and define IOMMU v2
...
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 1101 | ||||
-rw-r--r-- | drivers/vfio/vfio_spapr_eeh.c | 10 |
2 files changed, 1010 insertions, 101 deletions
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 730b4ef3e0cc..0582b72ef377 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c | |||
@@ -19,8 +19,10 @@ | |||
19 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
20 | #include <linux/err.h> | 20 | #include <linux/err.h> |
21 | #include <linux/vfio.h> | 21 | #include <linux/vfio.h> |
22 | #include <linux/vmalloc.h> | ||
22 | #include <asm/iommu.h> | 23 | #include <asm/iommu.h> |
23 | #include <asm/tce.h> | 24 | #include <asm/tce.h> |
25 | #include <asm/mmu_context.h> | ||
24 | 26 | ||
25 | #define DRIVER_VERSION "0.1" | 27 | #define DRIVER_VERSION "0.1" |
26 | #define DRIVER_AUTHOR "aik@ozlabs.ru" | 28 | #define DRIVER_AUTHOR "aik@ozlabs.ru" |
@@ -29,6 +31,51 @@ | |||
29 | static void tce_iommu_detach_group(void *iommu_data, | 31 | static void tce_iommu_detach_group(void *iommu_data, |
30 | struct iommu_group *iommu_group); | 32 | struct iommu_group *iommu_group); |
31 | 33 | ||
34 | static long try_increment_locked_vm(long npages) | ||
35 | { | ||
36 | long ret = 0, locked, lock_limit; | ||
37 | |||
38 | if (!current || !current->mm) | ||
39 | return -ESRCH; /* process exited */ | ||
40 | |||
41 | if (!npages) | ||
42 | return 0; | ||
43 | |||
44 | down_write(¤t->mm->mmap_sem); | ||
45 | locked = current->mm->locked_vm + npages; | ||
46 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
47 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | ||
48 | ret = -ENOMEM; | ||
49 | else | ||
50 | current->mm->locked_vm += npages; | ||
51 | |||
52 | pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid, | ||
53 | npages << PAGE_SHIFT, | ||
54 | current->mm->locked_vm << PAGE_SHIFT, | ||
55 | rlimit(RLIMIT_MEMLOCK), | ||
56 | ret ? " - exceeded" : ""); | ||
57 | |||
58 | up_write(¤t->mm->mmap_sem); | ||
59 | |||
60 | return ret; | ||
61 | } | ||
62 | |||
63 | static void decrement_locked_vm(long npages) | ||
64 | { | ||
65 | if (!current || !current->mm || !npages) | ||
66 | return; /* process exited */ | ||
67 | |||
68 | down_write(¤t->mm->mmap_sem); | ||
69 | if (WARN_ON_ONCE(npages > current->mm->locked_vm)) | ||
70 | npages = current->mm->locked_vm; | ||
71 | current->mm->locked_vm -= npages; | ||
72 | pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid, | ||
73 | npages << PAGE_SHIFT, | ||
74 | current->mm->locked_vm << PAGE_SHIFT, | ||
75 | rlimit(RLIMIT_MEMLOCK)); | ||
76 | up_write(¤t->mm->mmap_sem); | ||
77 | } | ||
78 | |||
32 | /* | 79 | /* |
33 | * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation | 80 | * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation |
34 | * | 81 | * |
@@ -36,6 +83,11 @@ static void tce_iommu_detach_group(void *iommu_data, | |||
36 | * into DMA'ble space using the IOMMU | 83 | * into DMA'ble space using the IOMMU |
37 | */ | 84 | */ |
38 | 85 | ||
86 | struct tce_iommu_group { | ||
87 | struct list_head next; | ||
88 | struct iommu_group *grp; | ||
89 | }; | ||
90 | |||
39 | /* | 91 | /* |
40 | * The container descriptor supports only a single group per container. | 92 | * The container descriptor supports only a single group per container. |
41 | * Required by the API as the container is not supplied with the IOMMU group | 93 | * Required by the API as the container is not supplied with the IOMMU group |
@@ -43,18 +95,140 @@ static void tce_iommu_detach_group(void *iommu_data, | |||
43 | */ | 95 | */ |
44 | struct tce_container { | 96 | struct tce_container { |
45 | struct mutex lock; | 97 | struct mutex lock; |
46 | struct iommu_table *tbl; | ||
47 | bool enabled; | 98 | bool enabled; |
99 | bool v2; | ||
100 | unsigned long locked_pages; | ||
101 | struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; | ||
102 | struct list_head group_list; | ||
48 | }; | 103 | }; |
49 | 104 | ||
105 | static long tce_iommu_unregister_pages(struct tce_container *container, | ||
106 | __u64 vaddr, __u64 size) | ||
107 | { | ||
108 | struct mm_iommu_table_group_mem_t *mem; | ||
109 | |||
110 | if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) | ||
111 | return -EINVAL; | ||
112 | |||
113 | mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT); | ||
114 | if (!mem) | ||
115 | return -ENOENT; | ||
116 | |||
117 | return mm_iommu_put(mem); | ||
118 | } | ||
119 | |||
120 | static long tce_iommu_register_pages(struct tce_container *container, | ||
121 | __u64 vaddr, __u64 size) | ||
122 | { | ||
123 | long ret = 0; | ||
124 | struct mm_iommu_table_group_mem_t *mem = NULL; | ||
125 | unsigned long entries = size >> PAGE_SHIFT; | ||
126 | |||
127 | if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || | ||
128 | ((vaddr + size) < vaddr)) | ||
129 | return -EINVAL; | ||
130 | |||
131 | ret = mm_iommu_get(vaddr, entries, &mem); | ||
132 | if (ret) | ||
133 | return ret; | ||
134 | |||
135 | container->enabled = true; | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl) | ||
141 | { | ||
142 | unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * | ||
143 | tbl->it_size, PAGE_SIZE); | ||
144 | unsigned long *uas; | ||
145 | long ret; | ||
146 | |||
147 | BUG_ON(tbl->it_userspace); | ||
148 | |||
149 | ret = try_increment_locked_vm(cb >> PAGE_SHIFT); | ||
150 | if (ret) | ||
151 | return ret; | ||
152 | |||
153 | uas = vzalloc(cb); | ||
154 | if (!uas) { | ||
155 | decrement_locked_vm(cb >> PAGE_SHIFT); | ||
156 | return -ENOMEM; | ||
157 | } | ||
158 | tbl->it_userspace = uas; | ||
159 | |||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | static void tce_iommu_userspace_view_free(struct iommu_table *tbl) | ||
164 | { | ||
165 | unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * | ||
166 | tbl->it_size, PAGE_SIZE); | ||
167 | |||
168 | if (!tbl->it_userspace) | ||
169 | return; | ||
170 | |||
171 | vfree(tbl->it_userspace); | ||
172 | tbl->it_userspace = NULL; | ||
173 | decrement_locked_vm(cb >> PAGE_SHIFT); | ||
174 | } | ||
175 | |||
176 | static bool tce_page_is_contained(struct page *page, unsigned page_shift) | ||
177 | { | ||
178 | /* | ||
179 | * Check that the TCE table granularity is not bigger than the size of | ||
180 | * a page we just found. Otherwise the hardware can get access to | ||
181 | * a bigger memory chunk that it should. | ||
182 | */ | ||
183 | return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; | ||
184 | } | ||
185 | |||
186 | static inline bool tce_groups_attached(struct tce_container *container) | ||
187 | { | ||
188 | return !list_empty(&container->group_list); | ||
189 | } | ||
190 | |||
191 | static long tce_iommu_find_table(struct tce_container *container, | ||
192 | phys_addr_t ioba, struct iommu_table **ptbl) | ||
193 | { | ||
194 | long i; | ||
195 | |||
196 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | ||
197 | struct iommu_table *tbl = container->tables[i]; | ||
198 | |||
199 | if (tbl) { | ||
200 | unsigned long entry = ioba >> tbl->it_page_shift; | ||
201 | unsigned long start = tbl->it_offset; | ||
202 | unsigned long end = start + tbl->it_size; | ||
203 | |||
204 | if ((start <= entry) && (entry < end)) { | ||
205 | *ptbl = tbl; | ||
206 | return i; | ||
207 | } | ||
208 | } | ||
209 | } | ||
210 | |||
211 | return -1; | ||
212 | } | ||
213 | |||
214 | static int tce_iommu_find_free_table(struct tce_container *container) | ||
215 | { | ||
216 | int i; | ||
217 | |||
218 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | ||
219 | if (!container->tables[i]) | ||
220 | return i; | ||
221 | } | ||
222 | |||
223 | return -ENOSPC; | ||
224 | } | ||
225 | |||
50 | static int tce_iommu_enable(struct tce_container *container) | 226 | static int tce_iommu_enable(struct tce_container *container) |
51 | { | 227 | { |
52 | int ret = 0; | 228 | int ret = 0; |
53 | unsigned long locked, lock_limit, npages; | 229 | unsigned long locked; |
54 | struct iommu_table *tbl = container->tbl; | 230 | struct iommu_table_group *table_group; |
55 | 231 | struct tce_iommu_group *tcegrp; | |
56 | if (!container->tbl) | ||
57 | return -ENXIO; | ||
58 | 232 | ||
59 | if (!current->mm) | 233 | if (!current->mm) |
60 | return -ESRCH; /* process exited */ | 234 | return -ESRCH; /* process exited */ |
@@ -79,21 +253,38 @@ static int tce_iommu_enable(struct tce_container *container) | |||
79 | * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, | 253 | * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, |
80 | * that would effectively kill the guest at random points, much better | 254 | * that would effectively kill the guest at random points, much better |
81 | * enforcing the limit based on the max that the guest can map. | 255 | * enforcing the limit based on the max that the guest can map. |
256 | * | ||
257 | * Unfortunately at the moment it counts whole tables, no matter how | ||
258 | * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups | ||
259 | * each with 2GB DMA window, 8GB will be counted here. The reason for | ||
260 | * this is that we cannot tell here the amount of RAM used by the guest | ||
261 | * as this information is only available from KVM and VFIO is | ||
262 | * KVM agnostic. | ||
263 | * | ||
264 | * So we do not allow enabling a container without a group attached | ||
265 | * as there is no way to know how much we should increment | ||
266 | * the locked_vm counter. | ||
82 | */ | 267 | */ |
83 | down_write(¤t->mm->mmap_sem); | 268 | if (!tce_groups_attached(container)) |
84 | npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; | 269 | return -ENODEV; |
85 | locked = current->mm->locked_vm + npages; | ||
86 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
87 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { | ||
88 | pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", | ||
89 | rlimit(RLIMIT_MEMLOCK)); | ||
90 | ret = -ENOMEM; | ||
91 | } else { | ||
92 | 270 | ||
93 | current->mm->locked_vm += npages; | 271 | tcegrp = list_first_entry(&container->group_list, |
94 | container->enabled = true; | 272 | struct tce_iommu_group, next); |
95 | } | 273 | table_group = iommu_group_get_iommudata(tcegrp->grp); |
96 | up_write(¤t->mm->mmap_sem); | 274 | if (!table_group) |
275 | return -ENODEV; | ||
276 | |||
277 | if (!table_group->tce32_size) | ||
278 | return -EPERM; | ||
279 | |||
280 | locked = table_group->tce32_size >> PAGE_SHIFT; | ||
281 | ret = try_increment_locked_vm(locked); | ||
282 | if (ret) | ||
283 | return ret; | ||
284 | |||
285 | container->locked_pages = locked; | ||
286 | |||
287 | container->enabled = true; | ||
97 | 288 | ||
98 | return ret; | 289 | return ret; |
99 | } | 290 | } |
@@ -105,20 +296,17 @@ static void tce_iommu_disable(struct tce_container *container) | |||
105 | 296 | ||
106 | container->enabled = false; | 297 | container->enabled = false; |
107 | 298 | ||
108 | if (!container->tbl || !current->mm) | 299 | if (!current->mm) |
109 | return; | 300 | return; |
110 | 301 | ||
111 | down_write(¤t->mm->mmap_sem); | 302 | decrement_locked_vm(container->locked_pages); |
112 | current->mm->locked_vm -= (container->tbl->it_size << | ||
113 | IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; | ||
114 | up_write(¤t->mm->mmap_sem); | ||
115 | } | 303 | } |
116 | 304 | ||
117 | static void *tce_iommu_open(unsigned long arg) | 305 | static void *tce_iommu_open(unsigned long arg) |
118 | { | 306 | { |
119 | struct tce_container *container; | 307 | struct tce_container *container; |
120 | 308 | ||
121 | if (arg != VFIO_SPAPR_TCE_IOMMU) { | 309 | if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) { |
122 | pr_err("tce_vfio: Wrong IOMMU type\n"); | 310 | pr_err("tce_vfio: Wrong IOMMU type\n"); |
123 | return ERR_PTR(-EINVAL); | 311 | return ERR_PTR(-EINVAL); |
124 | } | 312 | } |
@@ -128,36 +316,411 @@ static void *tce_iommu_open(unsigned long arg) | |||
128 | return ERR_PTR(-ENOMEM); | 316 | return ERR_PTR(-ENOMEM); |
129 | 317 | ||
130 | mutex_init(&container->lock); | 318 | mutex_init(&container->lock); |
319 | INIT_LIST_HEAD_RCU(&container->group_list); | ||
320 | |||
321 | container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; | ||
131 | 322 | ||
132 | return container; | 323 | return container; |
133 | } | 324 | } |
134 | 325 | ||
326 | static int tce_iommu_clear(struct tce_container *container, | ||
327 | struct iommu_table *tbl, | ||
328 | unsigned long entry, unsigned long pages); | ||
329 | static void tce_iommu_free_table(struct iommu_table *tbl); | ||
330 | |||
135 | static void tce_iommu_release(void *iommu_data) | 331 | static void tce_iommu_release(void *iommu_data) |
136 | { | 332 | { |
137 | struct tce_container *container = iommu_data; | 333 | struct tce_container *container = iommu_data; |
334 | struct iommu_table_group *table_group; | ||
335 | struct tce_iommu_group *tcegrp; | ||
336 | long i; | ||
337 | |||
338 | while (tce_groups_attached(container)) { | ||
339 | tcegrp = list_first_entry(&container->group_list, | ||
340 | struct tce_iommu_group, next); | ||
341 | table_group = iommu_group_get_iommudata(tcegrp->grp); | ||
342 | tce_iommu_detach_group(iommu_data, tcegrp->grp); | ||
343 | } | ||
138 | 344 | ||
139 | WARN_ON(container->tbl && !container->tbl->it_group); | 345 | /* |
140 | tce_iommu_disable(container); | 346 | * If VFIO created a table, it was not disposed |
347 | * by tce_iommu_detach_group() so do it now. | ||
348 | */ | ||
349 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | ||
350 | struct iommu_table *tbl = container->tables[i]; | ||
351 | |||
352 | if (!tbl) | ||
353 | continue; | ||
141 | 354 | ||
142 | if (container->tbl && container->tbl->it_group) | 355 | tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); |
143 | tce_iommu_detach_group(iommu_data, container->tbl->it_group); | 356 | tce_iommu_free_table(tbl); |
357 | } | ||
144 | 358 | ||
359 | tce_iommu_disable(container); | ||
145 | mutex_destroy(&container->lock); | 360 | mutex_destroy(&container->lock); |
146 | 361 | ||
147 | kfree(container); | 362 | kfree(container); |
148 | } | 363 | } |
149 | 364 | ||
365 | static void tce_iommu_unuse_page(struct tce_container *container, | ||
366 | unsigned long hpa) | ||
367 | { | ||
368 | struct page *page; | ||
369 | |||
370 | page = pfn_to_page(hpa >> PAGE_SHIFT); | ||
371 | put_page(page); | ||
372 | } | ||
373 | |||
374 | static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, | ||
375 | unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) | ||
376 | { | ||
377 | long ret = 0; | ||
378 | struct mm_iommu_table_group_mem_t *mem; | ||
379 | |||
380 | mem = mm_iommu_lookup(tce, size); | ||
381 | if (!mem) | ||
382 | return -EINVAL; | ||
383 | |||
384 | ret = mm_iommu_ua_to_hpa(mem, tce, phpa); | ||
385 | if (ret) | ||
386 | return -EINVAL; | ||
387 | |||
388 | *pmem = mem; | ||
389 | |||
390 | return 0; | ||
391 | } | ||
392 | |||
393 | static void tce_iommu_unuse_page_v2(struct iommu_table *tbl, | ||
394 | unsigned long entry) | ||
395 | { | ||
396 | struct mm_iommu_table_group_mem_t *mem = NULL; | ||
397 | int ret; | ||
398 | unsigned long hpa = 0; | ||
399 | unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); | ||
400 | |||
401 | if (!pua || !current || !current->mm) | ||
402 | return; | ||
403 | |||
404 | ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl), | ||
405 | &hpa, &mem); | ||
406 | if (ret) | ||
407 | pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", | ||
408 | __func__, *pua, entry, ret); | ||
409 | if (mem) | ||
410 | mm_iommu_mapped_dec(mem); | ||
411 | |||
412 | *pua = 0; | ||
413 | } | ||
414 | |||
415 | static int tce_iommu_clear(struct tce_container *container, | ||
416 | struct iommu_table *tbl, | ||
417 | unsigned long entry, unsigned long pages) | ||
418 | { | ||
419 | unsigned long oldhpa; | ||
420 | long ret; | ||
421 | enum dma_data_direction direction; | ||
422 | |||
423 | for ( ; pages; --pages, ++entry) { | ||
424 | direction = DMA_NONE; | ||
425 | oldhpa = 0; | ||
426 | ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction); | ||
427 | if (ret) | ||
428 | continue; | ||
429 | |||
430 | if (direction == DMA_NONE) | ||
431 | continue; | ||
432 | |||
433 | if (container->v2) { | ||
434 | tce_iommu_unuse_page_v2(tbl, entry); | ||
435 | continue; | ||
436 | } | ||
437 | |||
438 | tce_iommu_unuse_page(container, oldhpa); | ||
439 | } | ||
440 | |||
441 | return 0; | ||
442 | } | ||
443 | |||
444 | static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) | ||
445 | { | ||
446 | struct page *page = NULL; | ||
447 | enum dma_data_direction direction = iommu_tce_direction(tce); | ||
448 | |||
449 | if (get_user_pages_fast(tce & PAGE_MASK, 1, | ||
450 | direction != DMA_TO_DEVICE, &page) != 1) | ||
451 | return -EFAULT; | ||
452 | |||
453 | *hpa = __pa((unsigned long) page_address(page)); | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | static long tce_iommu_build(struct tce_container *container, | ||
459 | struct iommu_table *tbl, | ||
460 | unsigned long entry, unsigned long tce, unsigned long pages, | ||
461 | enum dma_data_direction direction) | ||
462 | { | ||
463 | long i, ret = 0; | ||
464 | struct page *page; | ||
465 | unsigned long hpa; | ||
466 | enum dma_data_direction dirtmp; | ||
467 | |||
468 | for (i = 0; i < pages; ++i) { | ||
469 | unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; | ||
470 | |||
471 | ret = tce_iommu_use_page(tce, &hpa); | ||
472 | if (ret) | ||
473 | break; | ||
474 | |||
475 | page = pfn_to_page(hpa >> PAGE_SHIFT); | ||
476 | if (!tce_page_is_contained(page, tbl->it_page_shift)) { | ||
477 | ret = -EPERM; | ||
478 | break; | ||
479 | } | ||
480 | |||
481 | hpa |= offset; | ||
482 | dirtmp = direction; | ||
483 | ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); | ||
484 | if (ret) { | ||
485 | tce_iommu_unuse_page(container, hpa); | ||
486 | pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", | ||
487 | __func__, entry << tbl->it_page_shift, | ||
488 | tce, ret); | ||
489 | break; | ||
490 | } | ||
491 | |||
492 | if (dirtmp != DMA_NONE) | ||
493 | tce_iommu_unuse_page(container, hpa); | ||
494 | |||
495 | tce += IOMMU_PAGE_SIZE(tbl); | ||
496 | } | ||
497 | |||
498 | if (ret) | ||
499 | tce_iommu_clear(container, tbl, entry, i); | ||
500 | |||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | static long tce_iommu_build_v2(struct tce_container *container, | ||
505 | struct iommu_table *tbl, | ||
506 | unsigned long entry, unsigned long tce, unsigned long pages, | ||
507 | enum dma_data_direction direction) | ||
508 | { | ||
509 | long i, ret = 0; | ||
510 | struct page *page; | ||
511 | unsigned long hpa; | ||
512 | enum dma_data_direction dirtmp; | ||
513 | |||
514 | for (i = 0; i < pages; ++i) { | ||
515 | struct mm_iommu_table_group_mem_t *mem = NULL; | ||
516 | unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, | ||
517 | entry + i); | ||
518 | |||
519 | ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl), | ||
520 | &hpa, &mem); | ||
521 | if (ret) | ||
522 | break; | ||
523 | |||
524 | page = pfn_to_page(hpa >> PAGE_SHIFT); | ||
525 | if (!tce_page_is_contained(page, tbl->it_page_shift)) { | ||
526 | ret = -EPERM; | ||
527 | break; | ||
528 | } | ||
529 | |||
530 | /* Preserve offset within IOMMU page */ | ||
531 | hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; | ||
532 | dirtmp = direction; | ||
533 | |||
534 | /* The registered region is being unregistered */ | ||
535 | if (mm_iommu_mapped_inc(mem)) | ||
536 | break; | ||
537 | |||
538 | ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); | ||
539 | if (ret) { | ||
540 | /* dirtmp cannot be DMA_NONE here */ | ||
541 | tce_iommu_unuse_page_v2(tbl, entry + i); | ||
542 | pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", | ||
543 | __func__, entry << tbl->it_page_shift, | ||
544 | tce, ret); | ||
545 | break; | ||
546 | } | ||
547 | |||
548 | if (dirtmp != DMA_NONE) | ||
549 | tce_iommu_unuse_page_v2(tbl, entry + i); | ||
550 | |||
551 | *pua = tce; | ||
552 | |||
553 | tce += IOMMU_PAGE_SIZE(tbl); | ||
554 | } | ||
555 | |||
556 | if (ret) | ||
557 | tce_iommu_clear(container, tbl, entry, i); | ||
558 | |||
559 | return ret; | ||
560 | } | ||
561 | |||
562 | static long tce_iommu_create_table(struct tce_container *container, | ||
563 | struct iommu_table_group *table_group, | ||
564 | int num, | ||
565 | __u32 page_shift, | ||
566 | __u64 window_size, | ||
567 | __u32 levels, | ||
568 | struct iommu_table **ptbl) | ||
569 | { | ||
570 | long ret, table_size; | ||
571 | |||
572 | table_size = table_group->ops->get_table_size(page_shift, window_size, | ||
573 | levels); | ||
574 | if (!table_size) | ||
575 | return -EINVAL; | ||
576 | |||
577 | ret = try_increment_locked_vm(table_size >> PAGE_SHIFT); | ||
578 | if (ret) | ||
579 | return ret; | ||
580 | |||
581 | ret = table_group->ops->create_table(table_group, num, | ||
582 | page_shift, window_size, levels, ptbl); | ||
583 | |||
584 | WARN_ON(!ret && !(*ptbl)->it_ops->free); | ||
585 | WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); | ||
586 | |||
587 | if (!ret && container->v2) { | ||
588 | ret = tce_iommu_userspace_view_alloc(*ptbl); | ||
589 | if (ret) | ||
590 | (*ptbl)->it_ops->free(*ptbl); | ||
591 | } | ||
592 | |||
593 | if (ret) | ||
594 | decrement_locked_vm(table_size >> PAGE_SHIFT); | ||
595 | |||
596 | return ret; | ||
597 | } | ||
598 | |||
599 | static void tce_iommu_free_table(struct iommu_table *tbl) | ||
600 | { | ||
601 | unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; | ||
602 | |||
603 | tce_iommu_userspace_view_free(tbl); | ||
604 | tbl->it_ops->free(tbl); | ||
605 | decrement_locked_vm(pages); | ||
606 | } | ||
607 | |||
608 | static long tce_iommu_create_window(struct tce_container *container, | ||
609 | __u32 page_shift, __u64 window_size, __u32 levels, | ||
610 | __u64 *start_addr) | ||
611 | { | ||
612 | struct tce_iommu_group *tcegrp; | ||
613 | struct iommu_table_group *table_group; | ||
614 | struct iommu_table *tbl = NULL; | ||
615 | long ret, num; | ||
616 | |||
617 | num = tce_iommu_find_free_table(container); | ||
618 | if (num < 0) | ||
619 | return num; | ||
620 | |||
621 | /* Get the first group for ops::create_table */ | ||
622 | tcegrp = list_first_entry(&container->group_list, | ||
623 | struct tce_iommu_group, next); | ||
624 | table_group = iommu_group_get_iommudata(tcegrp->grp); | ||
625 | if (!table_group) | ||
626 | return -EFAULT; | ||
627 | |||
628 | if (!(table_group->pgsizes & (1ULL << page_shift))) | ||
629 | return -EINVAL; | ||
630 | |||
631 | if (!table_group->ops->set_window || !table_group->ops->unset_window || | ||
632 | !table_group->ops->get_table_size || | ||
633 | !table_group->ops->create_table) | ||
634 | return -EPERM; | ||
635 | |||
636 | /* Create TCE table */ | ||
637 | ret = tce_iommu_create_table(container, table_group, num, | ||
638 | page_shift, window_size, levels, &tbl); | ||
639 | if (ret) | ||
640 | return ret; | ||
641 | |||
642 | BUG_ON(!tbl->it_ops->free); | ||
643 | |||
644 | /* | ||
645 | * Program the table to every group. | ||
646 | * Groups have been tested for compatibility at the attach time. | ||
647 | */ | ||
648 | list_for_each_entry(tcegrp, &container->group_list, next) { | ||
649 | table_group = iommu_group_get_iommudata(tcegrp->grp); | ||
650 | |||
651 | ret = table_group->ops->set_window(table_group, num, tbl); | ||
652 | if (ret) | ||
653 | goto unset_exit; | ||
654 | } | ||
655 | |||
656 | container->tables[num] = tbl; | ||
657 | |||
658 | /* Return start address assigned by platform in create_table() */ | ||
659 | *start_addr = tbl->it_offset << tbl->it_page_shift; | ||
660 | |||
661 | return 0; | ||
662 | |||
663 | unset_exit: | ||
664 | list_for_each_entry(tcegrp, &container->group_list, next) { | ||
665 | table_group = iommu_group_get_iommudata(tcegrp->grp); | ||
666 | table_group->ops->unset_window(table_group, num); | ||
667 | } | ||
668 | tce_iommu_free_table(tbl); | ||
669 | |||
670 | return ret; | ||
671 | } | ||
672 | |||
673 | static long tce_iommu_remove_window(struct tce_container *container, | ||
674 | __u64 start_addr) | ||
675 | { | ||
676 | struct iommu_table_group *table_group = NULL; | ||
677 | struct iommu_table *tbl; | ||
678 | struct tce_iommu_group *tcegrp; | ||
679 | int num; | ||
680 | |||
681 | num = tce_iommu_find_table(container, start_addr, &tbl); | ||
682 | if (num < 0) | ||
683 | return -EINVAL; | ||
684 | |||
685 | BUG_ON(!tbl->it_size); | ||
686 | |||
687 | /* Detach groups from IOMMUs */ | ||
688 | list_for_each_entry(tcegrp, &container->group_list, next) { | ||
689 | table_group = iommu_group_get_iommudata(tcegrp->grp); | ||
690 | |||
691 | /* | ||
692 | * SPAPR TCE IOMMU exposes the default DMA window to | ||
693 | * the guest via dma32_window_start/size of | ||
694 | * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow | ||
695 | * the userspace to remove this window, some do not so | ||
696 | * here we check for the platform capability. | ||
697 | */ | ||
698 | if (!table_group->ops || !table_group->ops->unset_window) | ||
699 | return -EPERM; | ||
700 | |||
701 | table_group->ops->unset_window(table_group, num); | ||
702 | } | ||
703 | |||
704 | /* Free table */ | ||
705 | tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); | ||
706 | tce_iommu_free_table(tbl); | ||
707 | container->tables[num] = NULL; | ||
708 | |||
709 | return 0; | ||
710 | } | ||
711 | |||
150 | static long tce_iommu_ioctl(void *iommu_data, | 712 | static long tce_iommu_ioctl(void *iommu_data, |
151 | unsigned int cmd, unsigned long arg) | 713 | unsigned int cmd, unsigned long arg) |
152 | { | 714 | { |
153 | struct tce_container *container = iommu_data; | 715 | struct tce_container *container = iommu_data; |
154 | unsigned long minsz; | 716 | unsigned long minsz, ddwsz; |
155 | long ret; | 717 | long ret; |
156 | 718 | ||
157 | switch (cmd) { | 719 | switch (cmd) { |
158 | case VFIO_CHECK_EXTENSION: | 720 | case VFIO_CHECK_EXTENSION: |
159 | switch (arg) { | 721 | switch (arg) { |
160 | case VFIO_SPAPR_TCE_IOMMU: | 722 | case VFIO_SPAPR_TCE_IOMMU: |
723 | case VFIO_SPAPR_TCE_v2_IOMMU: | ||
161 | ret = 1; | 724 | ret = 1; |
162 | break; | 725 | break; |
163 | default: | 726 | default: |
@@ -169,9 +732,17 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
169 | 732 | ||
170 | case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { | 733 | case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { |
171 | struct vfio_iommu_spapr_tce_info info; | 734 | struct vfio_iommu_spapr_tce_info info; |
172 | struct iommu_table *tbl = container->tbl; | 735 | struct tce_iommu_group *tcegrp; |
736 | struct iommu_table_group *table_group; | ||
737 | |||
738 | if (!tce_groups_attached(container)) | ||
739 | return -ENXIO; | ||
740 | |||
741 | tcegrp = list_first_entry(&container->group_list, | ||
742 | struct tce_iommu_group, next); | ||
743 | table_group = iommu_group_get_iommudata(tcegrp->grp); | ||
173 | 744 | ||
174 | if (WARN_ON(!tbl)) | 745 | if (!table_group) |
175 | return -ENXIO; | 746 | return -ENXIO; |
176 | 747 | ||
177 | minsz = offsetofend(struct vfio_iommu_spapr_tce_info, | 748 | minsz = offsetofend(struct vfio_iommu_spapr_tce_info, |
@@ -183,9 +754,24 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
183 | if (info.argsz < minsz) | 754 | if (info.argsz < minsz) |
184 | return -EINVAL; | 755 | return -EINVAL; |
185 | 756 | ||
186 | info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K; | 757 | info.dma32_window_start = table_group->tce32_start; |
187 | info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K; | 758 | info.dma32_window_size = table_group->tce32_size; |
188 | info.flags = 0; | 759 | info.flags = 0; |
760 | memset(&info.ddw, 0, sizeof(info.ddw)); | ||
761 | |||
762 | if (table_group->max_dynamic_windows_supported && | ||
763 | container->v2) { | ||
764 | info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW; | ||
765 | info.ddw.pgsizes = table_group->pgsizes; | ||
766 | info.ddw.max_dynamic_windows_supported = | ||
767 | table_group->max_dynamic_windows_supported; | ||
768 | info.ddw.levels = table_group->max_levels; | ||
769 | } | ||
770 | |||
771 | ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw); | ||
772 | |||
773 | if (info.argsz >= ddwsz) | ||
774 | minsz = ddwsz; | ||
189 | 775 | ||
190 | if (copy_to_user((void __user *)arg, &info, minsz)) | 776 | if (copy_to_user((void __user *)arg, &info, minsz)) |
191 | return -EFAULT; | 777 | return -EFAULT; |
@@ -194,13 +780,12 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
194 | } | 780 | } |
195 | case VFIO_IOMMU_MAP_DMA: { | 781 | case VFIO_IOMMU_MAP_DMA: { |
196 | struct vfio_iommu_type1_dma_map param; | 782 | struct vfio_iommu_type1_dma_map param; |
197 | struct iommu_table *tbl = container->tbl; | 783 | struct iommu_table *tbl = NULL; |
198 | unsigned long tce, i; | 784 | long num; |
785 | enum dma_data_direction direction; | ||
199 | 786 | ||
200 | if (!tbl) | 787 | if (!container->enabled) |
201 | return -ENXIO; | 788 | return -EPERM; |
202 | |||
203 | BUG_ON(!tbl->it_group); | ||
204 | 789 | ||
205 | minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); | 790 | minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); |
206 | 791 | ||
@@ -214,32 +799,43 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
214 | VFIO_DMA_MAP_FLAG_WRITE)) | 799 | VFIO_DMA_MAP_FLAG_WRITE)) |
215 | return -EINVAL; | 800 | return -EINVAL; |
216 | 801 | ||
217 | if ((param.size & ~IOMMU_PAGE_MASK_4K) || | 802 | num = tce_iommu_find_table(container, param.iova, &tbl); |
218 | (param.vaddr & ~IOMMU_PAGE_MASK_4K)) | 803 | if (num < 0) |
804 | return -ENXIO; | ||
805 | |||
806 | if ((param.size & ~IOMMU_PAGE_MASK(tbl)) || | ||
807 | (param.vaddr & ~IOMMU_PAGE_MASK(tbl))) | ||
219 | return -EINVAL; | 808 | return -EINVAL; |
220 | 809 | ||
221 | /* iova is checked by the IOMMU API */ | 810 | /* iova is checked by the IOMMU API */ |
222 | tce = param.vaddr; | 811 | if (param.flags & VFIO_DMA_MAP_FLAG_READ) { |
223 | if (param.flags & VFIO_DMA_MAP_FLAG_READ) | 812 | if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) |
224 | tce |= TCE_PCI_READ; | 813 | direction = DMA_BIDIRECTIONAL; |
225 | if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) | 814 | else |
226 | tce |= TCE_PCI_WRITE; | 815 | direction = DMA_TO_DEVICE; |
816 | } else { | ||
817 | if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) | ||
818 | direction = DMA_FROM_DEVICE; | ||
819 | else | ||
820 | return -EINVAL; | ||
821 | } | ||
227 | 822 | ||
228 | ret = iommu_tce_put_param_check(tbl, param.iova, tce); | 823 | ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr); |
229 | if (ret) | 824 | if (ret) |
230 | return ret; | 825 | return ret; |
231 | 826 | ||
232 | for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { | 827 | if (container->v2) |
233 | ret = iommu_put_tce_user_mode(tbl, | 828 | ret = tce_iommu_build_v2(container, tbl, |
234 | (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, | 829 | param.iova >> tbl->it_page_shift, |
235 | tce); | 830 | param.vaddr, |
236 | if (ret) | 831 | param.size >> tbl->it_page_shift, |
237 | break; | 832 | direction); |
238 | tce += IOMMU_PAGE_SIZE_4K; | 833 | else |
239 | } | 834 | ret = tce_iommu_build(container, tbl, |
240 | if (ret) | 835 | param.iova >> tbl->it_page_shift, |
241 | iommu_clear_tces_and_put_pages(tbl, | 836 | param.vaddr, |
242 | param.iova >> IOMMU_PAGE_SHIFT_4K, i); | 837 | param.size >> tbl->it_page_shift, |
838 | direction); | ||
243 | 839 | ||
244 | iommu_flush_tce(tbl); | 840 | iommu_flush_tce(tbl); |
245 | 841 | ||
@@ -247,10 +843,11 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
247 | } | 843 | } |
248 | case VFIO_IOMMU_UNMAP_DMA: { | 844 | case VFIO_IOMMU_UNMAP_DMA: { |
249 | struct vfio_iommu_type1_dma_unmap param; | 845 | struct vfio_iommu_type1_dma_unmap param; |
250 | struct iommu_table *tbl = container->tbl; | 846 | struct iommu_table *tbl = NULL; |
847 | long num; | ||
251 | 848 | ||
252 | if (WARN_ON(!tbl)) | 849 | if (!container->enabled) |
253 | return -ENXIO; | 850 | return -EPERM; |
254 | 851 | ||
255 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, | 852 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, |
256 | size); | 853 | size); |
@@ -265,22 +862,81 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
265 | if (param.flags) | 862 | if (param.flags) |
266 | return -EINVAL; | 863 | return -EINVAL; |
267 | 864 | ||
268 | if (param.size & ~IOMMU_PAGE_MASK_4K) | 865 | num = tce_iommu_find_table(container, param.iova, &tbl); |
866 | if (num < 0) | ||
867 | return -ENXIO; | ||
868 | |||
869 | if (param.size & ~IOMMU_PAGE_MASK(tbl)) | ||
269 | return -EINVAL; | 870 | return -EINVAL; |
270 | 871 | ||
271 | ret = iommu_tce_clear_param_check(tbl, param.iova, 0, | 872 | ret = iommu_tce_clear_param_check(tbl, param.iova, 0, |
272 | param.size >> IOMMU_PAGE_SHIFT_4K); | 873 | param.size >> tbl->it_page_shift); |
273 | if (ret) | 874 | if (ret) |
274 | return ret; | 875 | return ret; |
275 | 876 | ||
276 | ret = iommu_clear_tces_and_put_pages(tbl, | 877 | ret = tce_iommu_clear(container, tbl, |
277 | param.iova >> IOMMU_PAGE_SHIFT_4K, | 878 | param.iova >> tbl->it_page_shift, |
278 | param.size >> IOMMU_PAGE_SHIFT_4K); | 879 | param.size >> tbl->it_page_shift); |
279 | iommu_flush_tce(tbl); | 880 | iommu_flush_tce(tbl); |
280 | 881 | ||
281 | return ret; | 882 | return ret; |
282 | } | 883 | } |
884 | case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: { | ||
885 | struct vfio_iommu_spapr_register_memory param; | ||
886 | |||
887 | if (!container->v2) | ||
888 | break; | ||
889 | |||
890 | minsz = offsetofend(struct vfio_iommu_spapr_register_memory, | ||
891 | size); | ||
892 | |||
893 | if (copy_from_user(¶m, (void __user *)arg, minsz)) | ||
894 | return -EFAULT; | ||
895 | |||
896 | if (param.argsz < minsz) | ||
897 | return -EINVAL; | ||
898 | |||
899 | /* No flag is supported now */ | ||
900 | if (param.flags) | ||
901 | return -EINVAL; | ||
902 | |||
903 | mutex_lock(&container->lock); | ||
904 | ret = tce_iommu_register_pages(container, param.vaddr, | ||
905 | param.size); | ||
906 | mutex_unlock(&container->lock); | ||
907 | |||
908 | return ret; | ||
909 | } | ||
910 | case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: { | ||
911 | struct vfio_iommu_spapr_register_memory param; | ||
912 | |||
913 | if (!container->v2) | ||
914 | break; | ||
915 | |||
916 | minsz = offsetofend(struct vfio_iommu_spapr_register_memory, | ||
917 | size); | ||
918 | |||
919 | if (copy_from_user(¶m, (void __user *)arg, minsz)) | ||
920 | return -EFAULT; | ||
921 | |||
922 | if (param.argsz < minsz) | ||
923 | return -EINVAL; | ||
924 | |||
925 | /* No flag is supported now */ | ||
926 | if (param.flags) | ||
927 | return -EINVAL; | ||
928 | |||
929 | mutex_lock(&container->lock); | ||
930 | ret = tce_iommu_unregister_pages(container, param.vaddr, | ||
931 | param.size); | ||
932 | mutex_unlock(&container->lock); | ||
933 | |||
934 | return ret; | ||
935 | } | ||
283 | case VFIO_IOMMU_ENABLE: | 936 | case VFIO_IOMMU_ENABLE: |
937 | if (container->v2) | ||
938 | break; | ||
939 | |||
284 | mutex_lock(&container->lock); | 940 | mutex_lock(&container->lock); |
285 | ret = tce_iommu_enable(container); | 941 | ret = tce_iommu_enable(container); |
286 | mutex_unlock(&container->lock); | 942 | mutex_unlock(&container->lock); |
@@ -288,48 +944,280 @@ static long tce_iommu_ioctl(void *iommu_data, | |||
288 | 944 | ||
289 | 945 | ||
290 | case VFIO_IOMMU_DISABLE: | 946 | case VFIO_IOMMU_DISABLE: |
947 | if (container->v2) | ||
948 | break; | ||
949 | |||
291 | mutex_lock(&container->lock); | 950 | mutex_lock(&container->lock); |
292 | tce_iommu_disable(container); | 951 | tce_iommu_disable(container); |
293 | mutex_unlock(&container->lock); | 952 | mutex_unlock(&container->lock); |
294 | return 0; | 953 | return 0; |
295 | case VFIO_EEH_PE_OP: | ||
296 | if (!container->tbl || !container->tbl->it_group) | ||
297 | return -ENODEV; | ||
298 | 954 | ||
299 | return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, | 955 | case VFIO_EEH_PE_OP: { |
300 | cmd, arg); | 956 | struct tce_iommu_group *tcegrp; |
957 | |||
958 | ret = 0; | ||
959 | list_for_each_entry(tcegrp, &container->group_list, next) { | ||
960 | ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp, | ||
961 | cmd, arg); | ||
962 | if (ret) | ||
963 | return ret; | ||
964 | } | ||
965 | return ret; | ||
966 | } | ||
967 | |||
968 | case VFIO_IOMMU_SPAPR_TCE_CREATE: { | ||
969 | struct vfio_iommu_spapr_tce_create create; | ||
970 | |||
971 | if (!container->v2) | ||
972 | break; | ||
973 | |||
974 | if (!tce_groups_attached(container)) | ||
975 | return -ENXIO; | ||
976 | |||
977 | minsz = offsetofend(struct vfio_iommu_spapr_tce_create, | ||
978 | start_addr); | ||
979 | |||
980 | if (copy_from_user(&create, (void __user *)arg, minsz)) | ||
981 | return -EFAULT; | ||
982 | |||
983 | if (create.argsz < minsz) | ||
984 | return -EINVAL; | ||
985 | |||
986 | if (create.flags) | ||
987 | return -EINVAL; | ||
988 | |||
989 | mutex_lock(&container->lock); | ||
990 | |||
991 | ret = tce_iommu_create_window(container, create.page_shift, | ||
992 | create.window_size, create.levels, | ||
993 | &create.start_addr); | ||
994 | |||
995 | mutex_unlock(&container->lock); | ||
996 | |||
997 | if (!ret && copy_to_user((void __user *)arg, &create, minsz)) | ||
998 | ret = -EFAULT; | ||
999 | |||
1000 | return ret; | ||
1001 | } | ||
1002 | case VFIO_IOMMU_SPAPR_TCE_REMOVE: { | ||
1003 | struct vfio_iommu_spapr_tce_remove remove; | ||
1004 | |||
1005 | if (!container->v2) | ||
1006 | break; | ||
1007 | |||
1008 | if (!tce_groups_attached(container)) | ||
1009 | return -ENXIO; | ||
1010 | |||
1011 | minsz = offsetofend(struct vfio_iommu_spapr_tce_remove, | ||
1012 | start_addr); | ||
1013 | |||
1014 | if (copy_from_user(&remove, (void __user *)arg, minsz)) | ||
1015 | return -EFAULT; | ||
1016 | |||
1017 | if (remove.argsz < minsz) | ||
1018 | return -EINVAL; | ||
1019 | |||
1020 | if (remove.flags) | ||
1021 | return -EINVAL; | ||
1022 | |||
1023 | mutex_lock(&container->lock); | ||
1024 | |||
1025 | ret = tce_iommu_remove_window(container, remove.start_addr); | ||
1026 | |||
1027 | mutex_unlock(&container->lock); | ||
1028 | |||
1029 | return ret; | ||
1030 | } | ||
301 | } | 1031 | } |
302 | 1032 | ||
303 | return -ENOTTY; | 1033 | return -ENOTTY; |
304 | } | 1034 | } |
305 | 1035 | ||
1036 | static void tce_iommu_release_ownership(struct tce_container *container, | ||
1037 | struct iommu_table_group *table_group) | ||
1038 | { | ||
1039 | int i; | ||
1040 | |||
1041 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | ||
1042 | struct iommu_table *tbl = container->tables[i]; | ||
1043 | |||
1044 | if (!tbl) | ||
1045 | continue; | ||
1046 | |||
1047 | tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); | ||
1048 | tce_iommu_userspace_view_free(tbl); | ||
1049 | if (tbl->it_map) | ||
1050 | iommu_release_ownership(tbl); | ||
1051 | |||
1052 | container->tables[i] = NULL; | ||
1053 | } | ||
1054 | } | ||
1055 | |||
1056 | static int tce_iommu_take_ownership(struct tce_container *container, | ||
1057 | struct iommu_table_group *table_group) | ||
1058 | { | ||
1059 | int i, j, rc = 0; | ||
1060 | |||
1061 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | ||
1062 | struct iommu_table *tbl = table_group->tables[i]; | ||
1063 | |||
1064 | if (!tbl || !tbl->it_map) | ||
1065 | continue; | ||
1066 | |||
1067 | rc = tce_iommu_userspace_view_alloc(tbl); | ||
1068 | if (!rc) | ||
1069 | rc = iommu_take_ownership(tbl); | ||
1070 | |||
1071 | if (rc) { | ||
1072 | for (j = 0; j < i; ++j) | ||
1073 | iommu_release_ownership( | ||
1074 | table_group->tables[j]); | ||
1075 | |||
1076 | return rc; | ||
1077 | } | ||
1078 | } | ||
1079 | |||
1080 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) | ||
1081 | container->tables[i] = table_group->tables[i]; | ||
1082 | |||
1083 | return 0; | ||
1084 | } | ||
1085 | |||
1086 | static void tce_iommu_release_ownership_ddw(struct tce_container *container, | ||
1087 | struct iommu_table_group *table_group) | ||
1088 | { | ||
1089 | long i; | ||
1090 | |||
1091 | if (!table_group->ops->unset_window) { | ||
1092 | WARN_ON_ONCE(1); | ||
1093 | return; | ||
1094 | } | ||
1095 | |||
1096 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) | ||
1097 | table_group->ops->unset_window(table_group, i); | ||
1098 | |||
1099 | table_group->ops->release_ownership(table_group); | ||
1100 | } | ||
1101 | |||
1102 | static long tce_iommu_take_ownership_ddw(struct tce_container *container, | ||
1103 | struct iommu_table_group *table_group) | ||
1104 | { | ||
1105 | long i, ret = 0; | ||
1106 | struct iommu_table *tbl = NULL; | ||
1107 | |||
1108 | if (!table_group->ops->create_table || !table_group->ops->set_window || | ||
1109 | !table_group->ops->release_ownership) { | ||
1110 | WARN_ON_ONCE(1); | ||
1111 | return -EFAULT; | ||
1112 | } | ||
1113 | |||
1114 | table_group->ops->take_ownership(table_group); | ||
1115 | |||
1116 | /* | ||
1117 | * If it the first group attached, check if there is | ||
1118 | * a default DMA window and create one if none as | ||
1119 | * the userspace expects it to exist. | ||
1120 | */ | ||
1121 | if (!tce_groups_attached(container) && !container->tables[0]) { | ||
1122 | ret = tce_iommu_create_table(container, | ||
1123 | table_group, | ||
1124 | 0, /* window number */ | ||
1125 | IOMMU_PAGE_SHIFT_4K, | ||
1126 | table_group->tce32_size, | ||
1127 | 1, /* default levels */ | ||
1128 | &tbl); | ||
1129 | if (ret) | ||
1130 | goto release_exit; | ||
1131 | else | ||
1132 | container->tables[0] = tbl; | ||
1133 | } | ||
1134 | |||
1135 | /* Set all windows to the new group */ | ||
1136 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | ||
1137 | tbl = container->tables[i]; | ||
1138 | |||
1139 | if (!tbl) | ||
1140 | continue; | ||
1141 | |||
1142 | /* Set the default window to a new group */ | ||
1143 | ret = table_group->ops->set_window(table_group, i, tbl); | ||
1144 | if (ret) | ||
1145 | goto release_exit; | ||
1146 | } | ||
1147 | |||
1148 | return 0; | ||
1149 | |||
1150 | release_exit: | ||
1151 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) | ||
1152 | table_group->ops->unset_window(table_group, i); | ||
1153 | |||
1154 | table_group->ops->release_ownership(table_group); | ||
1155 | |||
1156 | return ret; | ||
1157 | } | ||
1158 | |||
306 | static int tce_iommu_attach_group(void *iommu_data, | 1159 | static int tce_iommu_attach_group(void *iommu_data, |
307 | struct iommu_group *iommu_group) | 1160 | struct iommu_group *iommu_group) |
308 | { | 1161 | { |
309 | int ret; | 1162 | int ret; |
310 | struct tce_container *container = iommu_data; | 1163 | struct tce_container *container = iommu_data; |
311 | struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); | 1164 | struct iommu_table_group *table_group; |
1165 | struct tce_iommu_group *tcegrp = NULL; | ||
312 | 1166 | ||
313 | BUG_ON(!tbl); | ||
314 | mutex_lock(&container->lock); | 1167 | mutex_lock(&container->lock); |
315 | 1168 | ||
316 | /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", | 1169 | /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", |
317 | iommu_group_id(iommu_group), iommu_group); */ | 1170 | iommu_group_id(iommu_group), iommu_group); */ |
318 | if (container->tbl) { | 1171 | table_group = iommu_group_get_iommudata(iommu_group); |
319 | pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", | 1172 | |
320 | iommu_group_id(container->tbl->it_group), | 1173 | if (tce_groups_attached(container) && (!table_group->ops || |
321 | iommu_group_id(iommu_group)); | 1174 | !table_group->ops->take_ownership || |
322 | ret = -EBUSY; | 1175 | !table_group->ops->release_ownership)) { |
323 | } else if (container->enabled) { | ||
324 | pr_err("tce_vfio: attaching group #%u to enabled container\n", | ||
325 | iommu_group_id(iommu_group)); | ||
326 | ret = -EBUSY; | 1176 | ret = -EBUSY; |
327 | } else { | 1177 | goto unlock_exit; |
328 | ret = iommu_take_ownership(tbl); | 1178 | } |
329 | if (!ret) | 1179 | |
330 | container->tbl = tbl; | 1180 | /* Check if new group has the same iommu_ops (i.e. compatible) */ |
1181 | list_for_each_entry(tcegrp, &container->group_list, next) { | ||
1182 | struct iommu_table_group *table_group_tmp; | ||
1183 | |||
1184 | if (tcegrp->grp == iommu_group) { | ||
1185 | pr_warn("tce_vfio: Group %d is already attached\n", | ||
1186 | iommu_group_id(iommu_group)); | ||
1187 | ret = -EBUSY; | ||
1188 | goto unlock_exit; | ||
1189 | } | ||
1190 | table_group_tmp = iommu_group_get_iommudata(tcegrp->grp); | ||
1191 | if (table_group_tmp->ops != table_group->ops) { | ||
1192 | pr_warn("tce_vfio: Group %d is incompatible with group %d\n", | ||
1193 | iommu_group_id(iommu_group), | ||
1194 | iommu_group_id(tcegrp->grp)); | ||
1195 | ret = -EPERM; | ||
1196 | goto unlock_exit; | ||
1197 | } | ||
1198 | } | ||
1199 | |||
1200 | tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL); | ||
1201 | if (!tcegrp) { | ||
1202 | ret = -ENOMEM; | ||
1203 | goto unlock_exit; | ||
331 | } | 1204 | } |
332 | 1205 | ||
1206 | if (!table_group->ops || !table_group->ops->take_ownership || | ||
1207 | !table_group->ops->release_ownership) | ||
1208 | ret = tce_iommu_take_ownership(container, table_group); | ||
1209 | else | ||
1210 | ret = tce_iommu_take_ownership_ddw(container, table_group); | ||
1211 | |||
1212 | if (!ret) { | ||
1213 | tcegrp->grp = iommu_group; | ||
1214 | list_add(&tcegrp->next, &container->group_list); | ||
1215 | } | ||
1216 | |||
1217 | unlock_exit: | ||
1218 | if (ret && tcegrp) | ||
1219 | kfree(tcegrp); | ||
1220 | |||
333 | mutex_unlock(&container->lock); | 1221 | mutex_unlock(&container->lock); |
334 | 1222 | ||
335 | return ret; | 1223 | return ret; |
@@ -339,26 +1227,37 @@ static void tce_iommu_detach_group(void *iommu_data, | |||
339 | struct iommu_group *iommu_group) | 1227 | struct iommu_group *iommu_group) |
340 | { | 1228 | { |
341 | struct tce_container *container = iommu_data; | 1229 | struct tce_container *container = iommu_data; |
342 | struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); | 1230 | struct iommu_table_group *table_group; |
1231 | bool found = false; | ||
1232 | struct tce_iommu_group *tcegrp; | ||
343 | 1233 | ||
344 | BUG_ON(!tbl); | ||
345 | mutex_lock(&container->lock); | 1234 | mutex_lock(&container->lock); |
346 | if (tbl != container->tbl) { | 1235 | |
347 | pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", | 1236 | list_for_each_entry(tcegrp, &container->group_list, next) { |
348 | iommu_group_id(iommu_group), | 1237 | if (tcegrp->grp == iommu_group) { |
349 | iommu_group_id(tbl->it_group)); | 1238 | found = true; |
350 | } else { | 1239 | break; |
351 | if (container->enabled) { | ||
352 | pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", | ||
353 | iommu_group_id(tbl->it_group)); | ||
354 | tce_iommu_disable(container); | ||
355 | } | 1240 | } |
1241 | } | ||
356 | 1242 | ||
357 | /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", | 1243 | if (!found) { |
358 | iommu_group_id(iommu_group), iommu_group); */ | 1244 | pr_warn("tce_vfio: detaching unattached group #%u\n", |
359 | container->tbl = NULL; | 1245 | iommu_group_id(iommu_group)); |
360 | iommu_release_ownership(tbl); | 1246 | goto unlock_exit; |
361 | } | 1247 | } |
1248 | |||
1249 | list_del(&tcegrp->next); | ||
1250 | kfree(tcegrp); | ||
1251 | |||
1252 | table_group = iommu_group_get_iommudata(iommu_group); | ||
1253 | BUG_ON(!table_group); | ||
1254 | |||
1255 | if (!table_group->ops || !table_group->ops->release_ownership) | ||
1256 | tce_iommu_release_ownership(container, table_group); | ||
1257 | else | ||
1258 | tce_iommu_release_ownership_ddw(container, table_group); | ||
1259 | |||
1260 | unlock_exit: | ||
362 | mutex_unlock(&container->lock); | 1261 | mutex_unlock(&container->lock); |
363 | } | 1262 | } |
364 | 1263 | ||
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 5fa42db769ee..38edeb4729a9 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c | |||
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, | |||
85 | case VFIO_EEH_PE_CONFIGURE: | 85 | case VFIO_EEH_PE_CONFIGURE: |
86 | ret = eeh_pe_configure(pe); | 86 | ret = eeh_pe_configure(pe); |
87 | break; | 87 | break; |
88 | case VFIO_EEH_PE_INJECT_ERR: | ||
89 | minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); | ||
90 | if (op.argsz < minsz) | ||
91 | return -EINVAL; | ||
92 | if (copy_from_user(&op, (void __user *)arg, minsz)) | ||
93 | return -EFAULT; | ||
94 | |||
95 | ret = eeh_pe_inject_err(pe, op.err.type, op.err.func, | ||
96 | op.err.addr, op.err.mask); | ||
97 | break; | ||
88 | default: | 98 | default: |
89 | ret = -EINVAL; | 99 | ret = -EINVAL; |
90 | } | 100 | } |