summaryrefslogtreecommitdiffstats
path: root/drivers/vfio
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 11:46:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 11:46:32 -0400
commit08d183e3c1f650b4db1d07d764502116861542fa (patch)
treef868a813f36744597bc7a8260c63cd37a3a94338 /drivers/vfio
parent4b1f2af6752a4cc9acc1c22ddf3842478965f113 (diff)
parent6096f884515466f400864ad23d16f20b731a7ce7 (diff)
Merge tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
Pull powerpc updates from Michael Ellerman: - disable the 32-bit vdso when building LE, so we can build with a 64-bit only toolchain. - EEH fixes from Gavin & Richard. - enable the sys_kcmp syscall from Laurent. - sysfs control for fastsleep workaround from Shreyas. - expose OPAL events as an irq chip by Alistair. - MSI ops moved to pci_controller_ops by Daniel. - fix for kernel to userspace backtraces for perf from Anton. - merge pseries and pseries_le defconfigs from Cyril. - CXL in-kernel API from Mikey. - OPAL prd driver from Jeremy. - fix for DSCR handling & tests from Anshuman. - Powernv flash mtd driver from Cyril. - dynamic DMA Window support on powernv from Alexey. - LLVM clang fixes & workarounds from Anton. - reworked version of the patch to abort syscalls when transactional. - fix the swap encoding to support 4TB, from Aneesh. - various fixes as usual. - Freescale updates from Scott: Highlights include more 8xx optimizations, an e6500 hugetlb optimization, QMan device tree nodes, t1024/t1023 support, and various fixes and cleanup. * tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: (180 commits) cxl: Fix typo in debug print cxl: Add CXL_KERNEL_API config option powerpc/powernv: Fix wrong IOMMU table in pnv_ioda_setup_bus_dma() powerpc/mm: Change the swap encoding in pte. powerpc/mm: PTE_RPN_MAX is not used, remove the same powerpc/tm: Abort syscalls in active transactions powerpc/iommu/ioda2: Enable compile with IOV=on and IOMMU_API=off powerpc/include: Add opal-prd to installed uapi headers powerpc/powernv: fix construction of opal PRD messages powerpc/powernv: Increase opal-irqchip initcall priority powerpc: Make doorbell check preemption safe powerpc/powernv: pnv_init_idle_states() should only run on powernv macintosh/nvram: Remove as unused powerpc: Don't use gcc specific options on clang powerpc: Don't use -mno-strict-align on clang powerpc: Only use -mtraceback=no, -mno-string and -msoft-float if toolchain supports it powerpc: Only use -mabi=altivec if toolchain supports it powerpc: Fix duplicate const clang warning in user access code vfio: powerpc/spapr: Support Dynamic DMA windows vfio: powerpc/spapr: Register memory and define IOMMU v2 ...
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c1101
-rw-r--r--drivers/vfio/vfio_spapr_eeh.c10
2 files changed, 1010 insertions, 101 deletions
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 730b4ef3e0cc..0582b72ef377 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -19,8 +19,10 @@
19#include <linux/uaccess.h> 19#include <linux/uaccess.h>
20#include <linux/err.h> 20#include <linux/err.h>
21#include <linux/vfio.h> 21#include <linux/vfio.h>
22#include <linux/vmalloc.h>
22#include <asm/iommu.h> 23#include <asm/iommu.h>
23#include <asm/tce.h> 24#include <asm/tce.h>
25#include <asm/mmu_context.h>
24 26
25#define DRIVER_VERSION "0.1" 27#define DRIVER_VERSION "0.1"
26#define DRIVER_AUTHOR "aik@ozlabs.ru" 28#define DRIVER_AUTHOR "aik@ozlabs.ru"
@@ -29,6 +31,51 @@
29static void tce_iommu_detach_group(void *iommu_data, 31static void tce_iommu_detach_group(void *iommu_data,
30 struct iommu_group *iommu_group); 32 struct iommu_group *iommu_group);
31 33
34static long try_increment_locked_vm(long npages)
35{
36 long ret = 0, locked, lock_limit;
37
38 if (!current || !current->mm)
39 return -ESRCH; /* process exited */
40
41 if (!npages)
42 return 0;
43
44 down_write(&current->mm->mmap_sem);
45 locked = current->mm->locked_vm + npages;
46 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
47 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
48 ret = -ENOMEM;
49 else
50 current->mm->locked_vm += npages;
51
52 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
53 npages << PAGE_SHIFT,
54 current->mm->locked_vm << PAGE_SHIFT,
55 rlimit(RLIMIT_MEMLOCK),
56 ret ? " - exceeded" : "");
57
58 up_write(&current->mm->mmap_sem);
59
60 return ret;
61}
62
63static void decrement_locked_vm(long npages)
64{
65 if (!current || !current->mm || !npages)
66 return; /* process exited */
67
68 down_write(&current->mm->mmap_sem);
69 if (WARN_ON_ONCE(npages > current->mm->locked_vm))
70 npages = current->mm->locked_vm;
71 current->mm->locked_vm -= npages;
72 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
73 npages << PAGE_SHIFT,
74 current->mm->locked_vm << PAGE_SHIFT,
75 rlimit(RLIMIT_MEMLOCK));
76 up_write(&current->mm->mmap_sem);
77}
78
32/* 79/*
33 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation 80 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
34 * 81 *
@@ -36,6 +83,11 @@ static void tce_iommu_detach_group(void *iommu_data,
36 * into DMA'ble space using the IOMMU 83 * into DMA'ble space using the IOMMU
37 */ 84 */
38 85
86struct tce_iommu_group {
87 struct list_head next;
88 struct iommu_group *grp;
89};
90
39/* 91/*
40 * The container descriptor supports only a single group per container. 92 * The container descriptor supports only a single group per container.
41 * Required by the API as the container is not supplied with the IOMMU group 93 * Required by the API as the container is not supplied with the IOMMU group
@@ -43,18 +95,140 @@ static void tce_iommu_detach_group(void *iommu_data,
43 */ 95 */
44struct tce_container { 96struct tce_container {
45 struct mutex lock; 97 struct mutex lock;
46 struct iommu_table *tbl;
47 bool enabled; 98 bool enabled;
99 bool v2;
100 unsigned long locked_pages;
101 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
102 struct list_head group_list;
48}; 103};
49 104
105static long tce_iommu_unregister_pages(struct tce_container *container,
106 __u64 vaddr, __u64 size)
107{
108 struct mm_iommu_table_group_mem_t *mem;
109
110 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
111 return -EINVAL;
112
113 mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT);
114 if (!mem)
115 return -ENOENT;
116
117 return mm_iommu_put(mem);
118}
119
120static long tce_iommu_register_pages(struct tce_container *container,
121 __u64 vaddr, __u64 size)
122{
123 long ret = 0;
124 struct mm_iommu_table_group_mem_t *mem = NULL;
125 unsigned long entries = size >> PAGE_SHIFT;
126
127 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
128 ((vaddr + size) < vaddr))
129 return -EINVAL;
130
131 ret = mm_iommu_get(vaddr, entries, &mem);
132 if (ret)
133 return ret;
134
135 container->enabled = true;
136
137 return 0;
138}
139
140static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl)
141{
142 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
143 tbl->it_size, PAGE_SIZE);
144 unsigned long *uas;
145 long ret;
146
147 BUG_ON(tbl->it_userspace);
148
149 ret = try_increment_locked_vm(cb >> PAGE_SHIFT);
150 if (ret)
151 return ret;
152
153 uas = vzalloc(cb);
154 if (!uas) {
155 decrement_locked_vm(cb >> PAGE_SHIFT);
156 return -ENOMEM;
157 }
158 tbl->it_userspace = uas;
159
160 return 0;
161}
162
163static void tce_iommu_userspace_view_free(struct iommu_table *tbl)
164{
165 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
166 tbl->it_size, PAGE_SIZE);
167
168 if (!tbl->it_userspace)
169 return;
170
171 vfree(tbl->it_userspace);
172 tbl->it_userspace = NULL;
173 decrement_locked_vm(cb >> PAGE_SHIFT);
174}
175
176static bool tce_page_is_contained(struct page *page, unsigned page_shift)
177{
178 /*
179 * Check that the TCE table granularity is not bigger than the size of
180 * a page we just found. Otherwise the hardware can get access to
181 * a bigger memory chunk that it should.
182 */
183 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
184}
185
186static inline bool tce_groups_attached(struct tce_container *container)
187{
188 return !list_empty(&container->group_list);
189}
190
191static long tce_iommu_find_table(struct tce_container *container,
192 phys_addr_t ioba, struct iommu_table **ptbl)
193{
194 long i;
195
196 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
197 struct iommu_table *tbl = container->tables[i];
198
199 if (tbl) {
200 unsigned long entry = ioba >> tbl->it_page_shift;
201 unsigned long start = tbl->it_offset;
202 unsigned long end = start + tbl->it_size;
203
204 if ((start <= entry) && (entry < end)) {
205 *ptbl = tbl;
206 return i;
207 }
208 }
209 }
210
211 return -1;
212}
213
214static int tce_iommu_find_free_table(struct tce_container *container)
215{
216 int i;
217
218 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
219 if (!container->tables[i])
220 return i;
221 }
222
223 return -ENOSPC;
224}
225
50static int tce_iommu_enable(struct tce_container *container) 226static int tce_iommu_enable(struct tce_container *container)
51{ 227{
52 int ret = 0; 228 int ret = 0;
53 unsigned long locked, lock_limit, npages; 229 unsigned long locked;
54 struct iommu_table *tbl = container->tbl; 230 struct iommu_table_group *table_group;
55 231 struct tce_iommu_group *tcegrp;
56 if (!container->tbl)
57 return -ENXIO;
58 232
59 if (!current->mm) 233 if (!current->mm)
60 return -ESRCH; /* process exited */ 234 return -ESRCH; /* process exited */
@@ -79,21 +253,38 @@ static int tce_iommu_enable(struct tce_container *container)
79 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, 253 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
80 * that would effectively kill the guest at random points, much better 254 * that would effectively kill the guest at random points, much better
81 * enforcing the limit based on the max that the guest can map. 255 * enforcing the limit based on the max that the guest can map.
256 *
257 * Unfortunately at the moment it counts whole tables, no matter how
258 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
259 * each with 2GB DMA window, 8GB will be counted here. The reason for
260 * this is that we cannot tell here the amount of RAM used by the guest
261 * as this information is only available from KVM and VFIO is
262 * KVM agnostic.
263 *
264 * So we do not allow enabling a container without a group attached
265 * as there is no way to know how much we should increment
266 * the locked_vm counter.
82 */ 267 */
83 down_write(&current->mm->mmap_sem); 268 if (!tce_groups_attached(container))
84 npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; 269 return -ENODEV;
85 locked = current->mm->locked_vm + npages;
86 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
87 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
88 pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
89 rlimit(RLIMIT_MEMLOCK));
90 ret = -ENOMEM;
91 } else {
92 270
93 current->mm->locked_vm += npages; 271 tcegrp = list_first_entry(&container->group_list,
94 container->enabled = true; 272 struct tce_iommu_group, next);
95 } 273 table_group = iommu_group_get_iommudata(tcegrp->grp);
96 up_write(&current->mm->mmap_sem); 274 if (!table_group)
275 return -ENODEV;
276
277 if (!table_group->tce32_size)
278 return -EPERM;
279
280 locked = table_group->tce32_size >> PAGE_SHIFT;
281 ret = try_increment_locked_vm(locked);
282 if (ret)
283 return ret;
284
285 container->locked_pages = locked;
286
287 container->enabled = true;
97 288
98 return ret; 289 return ret;
99} 290}
@@ -105,20 +296,17 @@ static void tce_iommu_disable(struct tce_container *container)
105 296
106 container->enabled = false; 297 container->enabled = false;
107 298
108 if (!container->tbl || !current->mm) 299 if (!current->mm)
109 return; 300 return;
110 301
111 down_write(&current->mm->mmap_sem); 302 decrement_locked_vm(container->locked_pages);
112 current->mm->locked_vm -= (container->tbl->it_size <<
113 IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
114 up_write(&current->mm->mmap_sem);
115} 303}
116 304
117static void *tce_iommu_open(unsigned long arg) 305static void *tce_iommu_open(unsigned long arg)
118{ 306{
119 struct tce_container *container; 307 struct tce_container *container;
120 308
121 if (arg != VFIO_SPAPR_TCE_IOMMU) { 309 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
122 pr_err("tce_vfio: Wrong IOMMU type\n"); 310 pr_err("tce_vfio: Wrong IOMMU type\n");
123 return ERR_PTR(-EINVAL); 311 return ERR_PTR(-EINVAL);
124 } 312 }
@@ -128,36 +316,411 @@ static void *tce_iommu_open(unsigned long arg)
128 return ERR_PTR(-ENOMEM); 316 return ERR_PTR(-ENOMEM);
129 317
130 mutex_init(&container->lock); 318 mutex_init(&container->lock);
319 INIT_LIST_HEAD_RCU(&container->group_list);
320
321 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
131 322
132 return container; 323 return container;
133} 324}
134 325
326static int tce_iommu_clear(struct tce_container *container,
327 struct iommu_table *tbl,
328 unsigned long entry, unsigned long pages);
329static void tce_iommu_free_table(struct iommu_table *tbl);
330
135static void tce_iommu_release(void *iommu_data) 331static void tce_iommu_release(void *iommu_data)
136{ 332{
137 struct tce_container *container = iommu_data; 333 struct tce_container *container = iommu_data;
334 struct iommu_table_group *table_group;
335 struct tce_iommu_group *tcegrp;
336 long i;
337
338 while (tce_groups_attached(container)) {
339 tcegrp = list_first_entry(&container->group_list,
340 struct tce_iommu_group, next);
341 table_group = iommu_group_get_iommudata(tcegrp->grp);
342 tce_iommu_detach_group(iommu_data, tcegrp->grp);
343 }
138 344
139 WARN_ON(container->tbl && !container->tbl->it_group); 345 /*
140 tce_iommu_disable(container); 346 * If VFIO created a table, it was not disposed
347 * by tce_iommu_detach_group() so do it now.
348 */
349 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
350 struct iommu_table *tbl = container->tables[i];
351
352 if (!tbl)
353 continue;
141 354
142 if (container->tbl && container->tbl->it_group) 355 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
143 tce_iommu_detach_group(iommu_data, container->tbl->it_group); 356 tce_iommu_free_table(tbl);
357 }
144 358
359 tce_iommu_disable(container);
145 mutex_destroy(&container->lock); 360 mutex_destroy(&container->lock);
146 361
147 kfree(container); 362 kfree(container);
148} 363}
149 364
365static void tce_iommu_unuse_page(struct tce_container *container,
366 unsigned long hpa)
367{
368 struct page *page;
369
370 page = pfn_to_page(hpa >> PAGE_SHIFT);
371 put_page(page);
372}
373
374static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size,
375 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
376{
377 long ret = 0;
378 struct mm_iommu_table_group_mem_t *mem;
379
380 mem = mm_iommu_lookup(tce, size);
381 if (!mem)
382 return -EINVAL;
383
384 ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
385 if (ret)
386 return -EINVAL;
387
388 *pmem = mem;
389
390 return 0;
391}
392
393static void tce_iommu_unuse_page_v2(struct iommu_table *tbl,
394 unsigned long entry)
395{
396 struct mm_iommu_table_group_mem_t *mem = NULL;
397 int ret;
398 unsigned long hpa = 0;
399 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
400
401 if (!pua || !current || !current->mm)
402 return;
403
404 ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl),
405 &hpa, &mem);
406 if (ret)
407 pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
408 __func__, *pua, entry, ret);
409 if (mem)
410 mm_iommu_mapped_dec(mem);
411
412 *pua = 0;
413}
414
415static int tce_iommu_clear(struct tce_container *container,
416 struct iommu_table *tbl,
417 unsigned long entry, unsigned long pages)
418{
419 unsigned long oldhpa;
420 long ret;
421 enum dma_data_direction direction;
422
423 for ( ; pages; --pages, ++entry) {
424 direction = DMA_NONE;
425 oldhpa = 0;
426 ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
427 if (ret)
428 continue;
429
430 if (direction == DMA_NONE)
431 continue;
432
433 if (container->v2) {
434 tce_iommu_unuse_page_v2(tbl, entry);
435 continue;
436 }
437
438 tce_iommu_unuse_page(container, oldhpa);
439 }
440
441 return 0;
442}
443
444static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
445{
446 struct page *page = NULL;
447 enum dma_data_direction direction = iommu_tce_direction(tce);
448
449 if (get_user_pages_fast(tce & PAGE_MASK, 1,
450 direction != DMA_TO_DEVICE, &page) != 1)
451 return -EFAULT;
452
453 *hpa = __pa((unsigned long) page_address(page));
454
455 return 0;
456}
457
458static long tce_iommu_build(struct tce_container *container,
459 struct iommu_table *tbl,
460 unsigned long entry, unsigned long tce, unsigned long pages,
461 enum dma_data_direction direction)
462{
463 long i, ret = 0;
464 struct page *page;
465 unsigned long hpa;
466 enum dma_data_direction dirtmp;
467
468 for (i = 0; i < pages; ++i) {
469 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
470
471 ret = tce_iommu_use_page(tce, &hpa);
472 if (ret)
473 break;
474
475 page = pfn_to_page(hpa >> PAGE_SHIFT);
476 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
477 ret = -EPERM;
478 break;
479 }
480
481 hpa |= offset;
482 dirtmp = direction;
483 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
484 if (ret) {
485 tce_iommu_unuse_page(container, hpa);
486 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
487 __func__, entry << tbl->it_page_shift,
488 tce, ret);
489 break;
490 }
491
492 if (dirtmp != DMA_NONE)
493 tce_iommu_unuse_page(container, hpa);
494
495 tce += IOMMU_PAGE_SIZE(tbl);
496 }
497
498 if (ret)
499 tce_iommu_clear(container, tbl, entry, i);
500
501 return ret;
502}
503
504static long tce_iommu_build_v2(struct tce_container *container,
505 struct iommu_table *tbl,
506 unsigned long entry, unsigned long tce, unsigned long pages,
507 enum dma_data_direction direction)
508{
509 long i, ret = 0;
510 struct page *page;
511 unsigned long hpa;
512 enum dma_data_direction dirtmp;
513
514 for (i = 0; i < pages; ++i) {
515 struct mm_iommu_table_group_mem_t *mem = NULL;
516 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
517 entry + i);
518
519 ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl),
520 &hpa, &mem);
521 if (ret)
522 break;
523
524 page = pfn_to_page(hpa >> PAGE_SHIFT);
525 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
526 ret = -EPERM;
527 break;
528 }
529
530 /* Preserve offset within IOMMU page */
531 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
532 dirtmp = direction;
533
534 /* The registered region is being unregistered */
535 if (mm_iommu_mapped_inc(mem))
536 break;
537
538 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
539 if (ret) {
540 /* dirtmp cannot be DMA_NONE here */
541 tce_iommu_unuse_page_v2(tbl, entry + i);
542 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
543 __func__, entry << tbl->it_page_shift,
544 tce, ret);
545 break;
546 }
547
548 if (dirtmp != DMA_NONE)
549 tce_iommu_unuse_page_v2(tbl, entry + i);
550
551 *pua = tce;
552
553 tce += IOMMU_PAGE_SIZE(tbl);
554 }
555
556 if (ret)
557 tce_iommu_clear(container, tbl, entry, i);
558
559 return ret;
560}
561
562static long tce_iommu_create_table(struct tce_container *container,
563 struct iommu_table_group *table_group,
564 int num,
565 __u32 page_shift,
566 __u64 window_size,
567 __u32 levels,
568 struct iommu_table **ptbl)
569{
570 long ret, table_size;
571
572 table_size = table_group->ops->get_table_size(page_shift, window_size,
573 levels);
574 if (!table_size)
575 return -EINVAL;
576
577 ret = try_increment_locked_vm(table_size >> PAGE_SHIFT);
578 if (ret)
579 return ret;
580
581 ret = table_group->ops->create_table(table_group, num,
582 page_shift, window_size, levels, ptbl);
583
584 WARN_ON(!ret && !(*ptbl)->it_ops->free);
585 WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
586
587 if (!ret && container->v2) {
588 ret = tce_iommu_userspace_view_alloc(*ptbl);
589 if (ret)
590 (*ptbl)->it_ops->free(*ptbl);
591 }
592
593 if (ret)
594 decrement_locked_vm(table_size >> PAGE_SHIFT);
595
596 return ret;
597}
598
599static void tce_iommu_free_table(struct iommu_table *tbl)
600{
601 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
602
603 tce_iommu_userspace_view_free(tbl);
604 tbl->it_ops->free(tbl);
605 decrement_locked_vm(pages);
606}
607
608static long tce_iommu_create_window(struct tce_container *container,
609 __u32 page_shift, __u64 window_size, __u32 levels,
610 __u64 *start_addr)
611{
612 struct tce_iommu_group *tcegrp;
613 struct iommu_table_group *table_group;
614 struct iommu_table *tbl = NULL;
615 long ret, num;
616
617 num = tce_iommu_find_free_table(container);
618 if (num < 0)
619 return num;
620
621 /* Get the first group for ops::create_table */
622 tcegrp = list_first_entry(&container->group_list,
623 struct tce_iommu_group, next);
624 table_group = iommu_group_get_iommudata(tcegrp->grp);
625 if (!table_group)
626 return -EFAULT;
627
628 if (!(table_group->pgsizes & (1ULL << page_shift)))
629 return -EINVAL;
630
631 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
632 !table_group->ops->get_table_size ||
633 !table_group->ops->create_table)
634 return -EPERM;
635
636 /* Create TCE table */
637 ret = tce_iommu_create_table(container, table_group, num,
638 page_shift, window_size, levels, &tbl);
639 if (ret)
640 return ret;
641
642 BUG_ON(!tbl->it_ops->free);
643
644 /*
645 * Program the table to every group.
646 * Groups have been tested for compatibility at the attach time.
647 */
648 list_for_each_entry(tcegrp, &container->group_list, next) {
649 table_group = iommu_group_get_iommudata(tcegrp->grp);
650
651 ret = table_group->ops->set_window(table_group, num, tbl);
652 if (ret)
653 goto unset_exit;
654 }
655
656 container->tables[num] = tbl;
657
658 /* Return start address assigned by platform in create_table() */
659 *start_addr = tbl->it_offset << tbl->it_page_shift;
660
661 return 0;
662
663unset_exit:
664 list_for_each_entry(tcegrp, &container->group_list, next) {
665 table_group = iommu_group_get_iommudata(tcegrp->grp);
666 table_group->ops->unset_window(table_group, num);
667 }
668 tce_iommu_free_table(tbl);
669
670 return ret;
671}
672
673static long tce_iommu_remove_window(struct tce_container *container,
674 __u64 start_addr)
675{
676 struct iommu_table_group *table_group = NULL;
677 struct iommu_table *tbl;
678 struct tce_iommu_group *tcegrp;
679 int num;
680
681 num = tce_iommu_find_table(container, start_addr, &tbl);
682 if (num < 0)
683 return -EINVAL;
684
685 BUG_ON(!tbl->it_size);
686
687 /* Detach groups from IOMMUs */
688 list_for_each_entry(tcegrp, &container->group_list, next) {
689 table_group = iommu_group_get_iommudata(tcegrp->grp);
690
691 /*
692 * SPAPR TCE IOMMU exposes the default DMA window to
693 * the guest via dma32_window_start/size of
694 * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow
695 * the userspace to remove this window, some do not so
696 * here we check for the platform capability.
697 */
698 if (!table_group->ops || !table_group->ops->unset_window)
699 return -EPERM;
700
701 table_group->ops->unset_window(table_group, num);
702 }
703
704 /* Free table */
705 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
706 tce_iommu_free_table(tbl);
707 container->tables[num] = NULL;
708
709 return 0;
710}
711
150static long tce_iommu_ioctl(void *iommu_data, 712static long tce_iommu_ioctl(void *iommu_data,
151 unsigned int cmd, unsigned long arg) 713 unsigned int cmd, unsigned long arg)
152{ 714{
153 struct tce_container *container = iommu_data; 715 struct tce_container *container = iommu_data;
154 unsigned long minsz; 716 unsigned long minsz, ddwsz;
155 long ret; 717 long ret;
156 718
157 switch (cmd) { 719 switch (cmd) {
158 case VFIO_CHECK_EXTENSION: 720 case VFIO_CHECK_EXTENSION:
159 switch (arg) { 721 switch (arg) {
160 case VFIO_SPAPR_TCE_IOMMU: 722 case VFIO_SPAPR_TCE_IOMMU:
723 case VFIO_SPAPR_TCE_v2_IOMMU:
161 ret = 1; 724 ret = 1;
162 break; 725 break;
163 default: 726 default:
@@ -169,9 +732,17 @@ static long tce_iommu_ioctl(void *iommu_data,
169 732
170 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { 733 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
171 struct vfio_iommu_spapr_tce_info info; 734 struct vfio_iommu_spapr_tce_info info;
172 struct iommu_table *tbl = container->tbl; 735 struct tce_iommu_group *tcegrp;
736 struct iommu_table_group *table_group;
737
738 if (!tce_groups_attached(container))
739 return -ENXIO;
740
741 tcegrp = list_first_entry(&container->group_list,
742 struct tce_iommu_group, next);
743 table_group = iommu_group_get_iommudata(tcegrp->grp);
173 744
174 if (WARN_ON(!tbl)) 745 if (!table_group)
175 return -ENXIO; 746 return -ENXIO;
176 747
177 minsz = offsetofend(struct vfio_iommu_spapr_tce_info, 748 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
@@ -183,9 +754,24 @@ static long tce_iommu_ioctl(void *iommu_data,
183 if (info.argsz < minsz) 754 if (info.argsz < minsz)
184 return -EINVAL; 755 return -EINVAL;
185 756
186 info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K; 757 info.dma32_window_start = table_group->tce32_start;
187 info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K; 758 info.dma32_window_size = table_group->tce32_size;
188 info.flags = 0; 759 info.flags = 0;
760 memset(&info.ddw, 0, sizeof(info.ddw));
761
762 if (table_group->max_dynamic_windows_supported &&
763 container->v2) {
764 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
765 info.ddw.pgsizes = table_group->pgsizes;
766 info.ddw.max_dynamic_windows_supported =
767 table_group->max_dynamic_windows_supported;
768 info.ddw.levels = table_group->max_levels;
769 }
770
771 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
772
773 if (info.argsz >= ddwsz)
774 minsz = ddwsz;
189 775
190 if (copy_to_user((void __user *)arg, &info, minsz)) 776 if (copy_to_user((void __user *)arg, &info, minsz))
191 return -EFAULT; 777 return -EFAULT;
@@ -194,13 +780,12 @@ static long tce_iommu_ioctl(void *iommu_data,
194 } 780 }
195 case VFIO_IOMMU_MAP_DMA: { 781 case VFIO_IOMMU_MAP_DMA: {
196 struct vfio_iommu_type1_dma_map param; 782 struct vfio_iommu_type1_dma_map param;
197 struct iommu_table *tbl = container->tbl; 783 struct iommu_table *tbl = NULL;
198 unsigned long tce, i; 784 long num;
785 enum dma_data_direction direction;
199 786
200 if (!tbl) 787 if (!container->enabled)
201 return -ENXIO; 788 return -EPERM;
202
203 BUG_ON(!tbl->it_group);
204 789
205 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); 790 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
206 791
@@ -214,32 +799,43 @@ static long tce_iommu_ioctl(void *iommu_data,
214 VFIO_DMA_MAP_FLAG_WRITE)) 799 VFIO_DMA_MAP_FLAG_WRITE))
215 return -EINVAL; 800 return -EINVAL;
216 801
217 if ((param.size & ~IOMMU_PAGE_MASK_4K) || 802 num = tce_iommu_find_table(container, param.iova, &tbl);
218 (param.vaddr & ~IOMMU_PAGE_MASK_4K)) 803 if (num < 0)
804 return -ENXIO;
805
806 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
807 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
219 return -EINVAL; 808 return -EINVAL;
220 809
221 /* iova is checked by the IOMMU API */ 810 /* iova is checked by the IOMMU API */
222 tce = param.vaddr; 811 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
223 if (param.flags & VFIO_DMA_MAP_FLAG_READ) 812 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
224 tce |= TCE_PCI_READ; 813 direction = DMA_BIDIRECTIONAL;
225 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) 814 else
226 tce |= TCE_PCI_WRITE; 815 direction = DMA_TO_DEVICE;
816 } else {
817 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
818 direction = DMA_FROM_DEVICE;
819 else
820 return -EINVAL;
821 }
227 822
228 ret = iommu_tce_put_param_check(tbl, param.iova, tce); 823 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
229 if (ret) 824 if (ret)
230 return ret; 825 return ret;
231 826
232 for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { 827 if (container->v2)
233 ret = iommu_put_tce_user_mode(tbl, 828 ret = tce_iommu_build_v2(container, tbl,
234 (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, 829 param.iova >> tbl->it_page_shift,
235 tce); 830 param.vaddr,
236 if (ret) 831 param.size >> tbl->it_page_shift,
237 break; 832 direction);
238 tce += IOMMU_PAGE_SIZE_4K; 833 else
239 } 834 ret = tce_iommu_build(container, tbl,
240 if (ret) 835 param.iova >> tbl->it_page_shift,
241 iommu_clear_tces_and_put_pages(tbl, 836 param.vaddr,
242 param.iova >> IOMMU_PAGE_SHIFT_4K, i); 837 param.size >> tbl->it_page_shift,
838 direction);
243 839
244 iommu_flush_tce(tbl); 840 iommu_flush_tce(tbl);
245 841
@@ -247,10 +843,11 @@ static long tce_iommu_ioctl(void *iommu_data,
247 } 843 }
248 case VFIO_IOMMU_UNMAP_DMA: { 844 case VFIO_IOMMU_UNMAP_DMA: {
249 struct vfio_iommu_type1_dma_unmap param; 845 struct vfio_iommu_type1_dma_unmap param;
250 struct iommu_table *tbl = container->tbl; 846 struct iommu_table *tbl = NULL;
847 long num;
251 848
252 if (WARN_ON(!tbl)) 849 if (!container->enabled)
253 return -ENXIO; 850 return -EPERM;
254 851
255 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, 852 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
256 size); 853 size);
@@ -265,22 +862,81 @@ static long tce_iommu_ioctl(void *iommu_data,
265 if (param.flags) 862 if (param.flags)
266 return -EINVAL; 863 return -EINVAL;
267 864
268 if (param.size & ~IOMMU_PAGE_MASK_4K) 865 num = tce_iommu_find_table(container, param.iova, &tbl);
866 if (num < 0)
867 return -ENXIO;
868
869 if (param.size & ~IOMMU_PAGE_MASK(tbl))
269 return -EINVAL; 870 return -EINVAL;
270 871
271 ret = iommu_tce_clear_param_check(tbl, param.iova, 0, 872 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
272 param.size >> IOMMU_PAGE_SHIFT_4K); 873 param.size >> tbl->it_page_shift);
273 if (ret) 874 if (ret)
274 return ret; 875 return ret;
275 876
276 ret = iommu_clear_tces_and_put_pages(tbl, 877 ret = tce_iommu_clear(container, tbl,
277 param.iova >> IOMMU_PAGE_SHIFT_4K, 878 param.iova >> tbl->it_page_shift,
278 param.size >> IOMMU_PAGE_SHIFT_4K); 879 param.size >> tbl->it_page_shift);
279 iommu_flush_tce(tbl); 880 iommu_flush_tce(tbl);
280 881
281 return ret; 882 return ret;
282 } 883 }
884 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
885 struct vfio_iommu_spapr_register_memory param;
886
887 if (!container->v2)
888 break;
889
890 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
891 size);
892
893 if (copy_from_user(&param, (void __user *)arg, minsz))
894 return -EFAULT;
895
896 if (param.argsz < minsz)
897 return -EINVAL;
898
899 /* No flag is supported now */
900 if (param.flags)
901 return -EINVAL;
902
903 mutex_lock(&container->lock);
904 ret = tce_iommu_register_pages(container, param.vaddr,
905 param.size);
906 mutex_unlock(&container->lock);
907
908 return ret;
909 }
910 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
911 struct vfio_iommu_spapr_register_memory param;
912
913 if (!container->v2)
914 break;
915
916 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
917 size);
918
919 if (copy_from_user(&param, (void __user *)arg, minsz))
920 return -EFAULT;
921
922 if (param.argsz < minsz)
923 return -EINVAL;
924
925 /* No flag is supported now */
926 if (param.flags)
927 return -EINVAL;
928
929 mutex_lock(&container->lock);
930 ret = tce_iommu_unregister_pages(container, param.vaddr,
931 param.size);
932 mutex_unlock(&container->lock);
933
934 return ret;
935 }
283 case VFIO_IOMMU_ENABLE: 936 case VFIO_IOMMU_ENABLE:
937 if (container->v2)
938 break;
939
284 mutex_lock(&container->lock); 940 mutex_lock(&container->lock);
285 ret = tce_iommu_enable(container); 941 ret = tce_iommu_enable(container);
286 mutex_unlock(&container->lock); 942 mutex_unlock(&container->lock);
@@ -288,48 +944,280 @@ static long tce_iommu_ioctl(void *iommu_data,
288 944
289 945
290 case VFIO_IOMMU_DISABLE: 946 case VFIO_IOMMU_DISABLE:
947 if (container->v2)
948 break;
949
291 mutex_lock(&container->lock); 950 mutex_lock(&container->lock);
292 tce_iommu_disable(container); 951 tce_iommu_disable(container);
293 mutex_unlock(&container->lock); 952 mutex_unlock(&container->lock);
294 return 0; 953 return 0;
295 case VFIO_EEH_PE_OP:
296 if (!container->tbl || !container->tbl->it_group)
297 return -ENODEV;
298 954
299 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, 955 case VFIO_EEH_PE_OP: {
300 cmd, arg); 956 struct tce_iommu_group *tcegrp;
957
958 ret = 0;
959 list_for_each_entry(tcegrp, &container->group_list, next) {
960 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
961 cmd, arg);
962 if (ret)
963 return ret;
964 }
965 return ret;
966 }
967
968 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
969 struct vfio_iommu_spapr_tce_create create;
970
971 if (!container->v2)
972 break;
973
974 if (!tce_groups_attached(container))
975 return -ENXIO;
976
977 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
978 start_addr);
979
980 if (copy_from_user(&create, (void __user *)arg, minsz))
981 return -EFAULT;
982
983 if (create.argsz < minsz)
984 return -EINVAL;
985
986 if (create.flags)
987 return -EINVAL;
988
989 mutex_lock(&container->lock);
990
991 ret = tce_iommu_create_window(container, create.page_shift,
992 create.window_size, create.levels,
993 &create.start_addr);
994
995 mutex_unlock(&container->lock);
996
997 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
998 ret = -EFAULT;
999
1000 return ret;
1001 }
1002 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1003 struct vfio_iommu_spapr_tce_remove remove;
1004
1005 if (!container->v2)
1006 break;
1007
1008 if (!tce_groups_attached(container))
1009 return -ENXIO;
1010
1011 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1012 start_addr);
1013
1014 if (copy_from_user(&remove, (void __user *)arg, minsz))
1015 return -EFAULT;
1016
1017 if (remove.argsz < minsz)
1018 return -EINVAL;
1019
1020 if (remove.flags)
1021 return -EINVAL;
1022
1023 mutex_lock(&container->lock);
1024
1025 ret = tce_iommu_remove_window(container, remove.start_addr);
1026
1027 mutex_unlock(&container->lock);
1028
1029 return ret;
1030 }
301 } 1031 }
302 1032
303 return -ENOTTY; 1033 return -ENOTTY;
304} 1034}
305 1035
1036static void tce_iommu_release_ownership(struct tce_container *container,
1037 struct iommu_table_group *table_group)
1038{
1039 int i;
1040
1041 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1042 struct iommu_table *tbl = container->tables[i];
1043
1044 if (!tbl)
1045 continue;
1046
1047 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1048 tce_iommu_userspace_view_free(tbl);
1049 if (tbl->it_map)
1050 iommu_release_ownership(tbl);
1051
1052 container->tables[i] = NULL;
1053 }
1054}
1055
1056static int tce_iommu_take_ownership(struct tce_container *container,
1057 struct iommu_table_group *table_group)
1058{
1059 int i, j, rc = 0;
1060
1061 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1062 struct iommu_table *tbl = table_group->tables[i];
1063
1064 if (!tbl || !tbl->it_map)
1065 continue;
1066
1067 rc = tce_iommu_userspace_view_alloc(tbl);
1068 if (!rc)
1069 rc = iommu_take_ownership(tbl);
1070
1071 if (rc) {
1072 for (j = 0; j < i; ++j)
1073 iommu_release_ownership(
1074 table_group->tables[j]);
1075
1076 return rc;
1077 }
1078 }
1079
1080 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1081 container->tables[i] = table_group->tables[i];
1082
1083 return 0;
1084}
1085
1086static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1087 struct iommu_table_group *table_group)
1088{
1089 long i;
1090
1091 if (!table_group->ops->unset_window) {
1092 WARN_ON_ONCE(1);
1093 return;
1094 }
1095
1096 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1097 table_group->ops->unset_window(table_group, i);
1098
1099 table_group->ops->release_ownership(table_group);
1100}
1101
1102static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1103 struct iommu_table_group *table_group)
1104{
1105 long i, ret = 0;
1106 struct iommu_table *tbl = NULL;
1107
1108 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1109 !table_group->ops->release_ownership) {
1110 WARN_ON_ONCE(1);
1111 return -EFAULT;
1112 }
1113
1114 table_group->ops->take_ownership(table_group);
1115
1116 /*
1117 * If it the first group attached, check if there is
1118 * a default DMA window and create one if none as
1119 * the userspace expects it to exist.
1120 */
1121 if (!tce_groups_attached(container) && !container->tables[0]) {
1122 ret = tce_iommu_create_table(container,
1123 table_group,
1124 0, /* window number */
1125 IOMMU_PAGE_SHIFT_4K,
1126 table_group->tce32_size,
1127 1, /* default levels */
1128 &tbl);
1129 if (ret)
1130 goto release_exit;
1131 else
1132 container->tables[0] = tbl;
1133 }
1134
1135 /* Set all windows to the new group */
1136 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1137 tbl = container->tables[i];
1138
1139 if (!tbl)
1140 continue;
1141
1142 /* Set the default window to a new group */
1143 ret = table_group->ops->set_window(table_group, i, tbl);
1144 if (ret)
1145 goto release_exit;
1146 }
1147
1148 return 0;
1149
1150release_exit:
1151 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1152 table_group->ops->unset_window(table_group, i);
1153
1154 table_group->ops->release_ownership(table_group);
1155
1156 return ret;
1157}
1158
306static int tce_iommu_attach_group(void *iommu_data, 1159static int tce_iommu_attach_group(void *iommu_data,
307 struct iommu_group *iommu_group) 1160 struct iommu_group *iommu_group)
308{ 1161{
309 int ret; 1162 int ret;
310 struct tce_container *container = iommu_data; 1163 struct tce_container *container = iommu_data;
311 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); 1164 struct iommu_table_group *table_group;
1165 struct tce_iommu_group *tcegrp = NULL;
312 1166
313 BUG_ON(!tbl);
314 mutex_lock(&container->lock); 1167 mutex_lock(&container->lock);
315 1168
316 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", 1169 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
317 iommu_group_id(iommu_group), iommu_group); */ 1170 iommu_group_id(iommu_group), iommu_group); */
318 if (container->tbl) { 1171 table_group = iommu_group_get_iommudata(iommu_group);
319 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", 1172
320 iommu_group_id(container->tbl->it_group), 1173 if (tce_groups_attached(container) && (!table_group->ops ||
321 iommu_group_id(iommu_group)); 1174 !table_group->ops->take_ownership ||
322 ret = -EBUSY; 1175 !table_group->ops->release_ownership)) {
323 } else if (container->enabled) {
324 pr_err("tce_vfio: attaching group #%u to enabled container\n",
325 iommu_group_id(iommu_group));
326 ret = -EBUSY; 1176 ret = -EBUSY;
327 } else { 1177 goto unlock_exit;
328 ret = iommu_take_ownership(tbl); 1178 }
329 if (!ret) 1179
330 container->tbl = tbl; 1180 /* Check if new group has the same iommu_ops (i.e. compatible) */
1181 list_for_each_entry(tcegrp, &container->group_list, next) {
1182 struct iommu_table_group *table_group_tmp;
1183
1184 if (tcegrp->grp == iommu_group) {
1185 pr_warn("tce_vfio: Group %d is already attached\n",
1186 iommu_group_id(iommu_group));
1187 ret = -EBUSY;
1188 goto unlock_exit;
1189 }
1190 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
1191 if (table_group_tmp->ops != table_group->ops) {
1192 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1193 iommu_group_id(iommu_group),
1194 iommu_group_id(tcegrp->grp));
1195 ret = -EPERM;
1196 goto unlock_exit;
1197 }
1198 }
1199
1200 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1201 if (!tcegrp) {
1202 ret = -ENOMEM;
1203 goto unlock_exit;
331 } 1204 }
332 1205
1206 if (!table_group->ops || !table_group->ops->take_ownership ||
1207 !table_group->ops->release_ownership)
1208 ret = tce_iommu_take_ownership(container, table_group);
1209 else
1210 ret = tce_iommu_take_ownership_ddw(container, table_group);
1211
1212 if (!ret) {
1213 tcegrp->grp = iommu_group;
1214 list_add(&tcegrp->next, &container->group_list);
1215 }
1216
1217unlock_exit:
1218 if (ret && tcegrp)
1219 kfree(tcegrp);
1220
333 mutex_unlock(&container->lock); 1221 mutex_unlock(&container->lock);
334 1222
335 return ret; 1223 return ret;
@@ -339,26 +1227,37 @@ static void tce_iommu_detach_group(void *iommu_data,
339 struct iommu_group *iommu_group) 1227 struct iommu_group *iommu_group)
340{ 1228{
341 struct tce_container *container = iommu_data; 1229 struct tce_container *container = iommu_data;
342 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); 1230 struct iommu_table_group *table_group;
1231 bool found = false;
1232 struct tce_iommu_group *tcegrp;
343 1233
344 BUG_ON(!tbl);
345 mutex_lock(&container->lock); 1234 mutex_lock(&container->lock);
346 if (tbl != container->tbl) { 1235
347 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", 1236 list_for_each_entry(tcegrp, &container->group_list, next) {
348 iommu_group_id(iommu_group), 1237 if (tcegrp->grp == iommu_group) {
349 iommu_group_id(tbl->it_group)); 1238 found = true;
350 } else { 1239 break;
351 if (container->enabled) {
352 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
353 iommu_group_id(tbl->it_group));
354 tce_iommu_disable(container);
355 } 1240 }
1241 }
356 1242
357 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", 1243 if (!found) {
358 iommu_group_id(iommu_group), iommu_group); */ 1244 pr_warn("tce_vfio: detaching unattached group #%u\n",
359 container->tbl = NULL; 1245 iommu_group_id(iommu_group));
360 iommu_release_ownership(tbl); 1246 goto unlock_exit;
361 } 1247 }
1248
1249 list_del(&tcegrp->next);
1250 kfree(tcegrp);
1251
1252 table_group = iommu_group_get_iommudata(iommu_group);
1253 BUG_ON(!table_group);
1254
1255 if (!table_group->ops || !table_group->ops->release_ownership)
1256 tce_iommu_release_ownership(container, table_group);
1257 else
1258 tce_iommu_release_ownership_ddw(container, table_group);
1259
1260unlock_exit:
362 mutex_unlock(&container->lock); 1261 mutex_unlock(&container->lock);
363} 1262}
364 1263
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db769ee..38edeb4729a9 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
85 case VFIO_EEH_PE_CONFIGURE: 85 case VFIO_EEH_PE_CONFIGURE:
86 ret = eeh_pe_configure(pe); 86 ret = eeh_pe_configure(pe);
87 break; 87 break;
88 case VFIO_EEH_PE_INJECT_ERR:
89 minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
90 if (op.argsz < minsz)
91 return -EINVAL;
92 if (copy_from_user(&op, (void __user *)arg, minsz))
93 return -EFAULT;
94
95 ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
96 op.err.addr, op.err.mask);
97 break;
88 default: 98 default:
89 ret = -EINVAL; 99 ret = -EINVAL;
90 } 100 }