aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-04 14:53:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-04 14:53:24 -0400
commitc336bf8e658122eeab63afe0bfcb6360a381a79c (patch)
treefb394cb3050fcd64b72508847dca181c7eafc943
parenta96480723c287c502b02659f4b347aecaa651ea1 (diff)
parent7cb671e7a34d73b29df77d0b53492277744e57e7 (diff)
Merge tag 'vfio-v4.12-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - Updates for SPAPR IOMMU backend including compatibility test and memory allocation check (Alexey Kardashevskiy) - Updates for type1 IOMMU backend to remove asynchronous locked page accounting and remove redundancy (Alex Williamson) * tag 'vfio-v4.12-rc1' of git://github.com/awilliam/linux-vfio: vfio/type1: Reduce repetitive calls in vfio_pin_pages_remote() vfio/type1: Prune vfio_pin_page_external() vfio/type1: Remove locked page accounting workqueue vfio/spapr_tce: Check kzalloc() return when preregistering memory vfio/powerpc/spapr_tce: Enforce IOMMU type compatibility check
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c13
-rw-r--r--drivers/vfio/vfio_iommu_type1.c150
2 files changed, 77 insertions, 86 deletions
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index cf3de91fbfe7..78dca1aa6410 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -198,6 +198,11 @@ static long tce_iommu_register_pages(struct tce_container *container,
198 return ret; 198 return ret;
199 199
200 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL); 200 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
201 if (!tcemem) {
202 mm_iommu_put(container->mm, mem);
203 return -ENOMEM;
204 }
205
201 tcemem->mem = mem; 206 tcemem->mem = mem;
202 list_add(&tcemem->next, &container->prereg_list); 207 list_add(&tcemem->next, &container->prereg_list);
203 208
@@ -1335,8 +1340,16 @@ static int tce_iommu_attach_group(void *iommu_data,
1335 1340
1336 if (!table_group->ops || !table_group->ops->take_ownership || 1341 if (!table_group->ops || !table_group->ops->take_ownership ||
1337 !table_group->ops->release_ownership) { 1342 !table_group->ops->release_ownership) {
1343 if (container->v2) {
1344 ret = -EPERM;
1345 goto unlock_exit;
1346 }
1338 ret = tce_iommu_take_ownership(container, table_group); 1347 ret = tce_iommu_take_ownership(container, table_group);
1339 } else { 1348 } else {
1349 if (!container->v2) {
1350 ret = -EPERM;
1351 goto unlock_exit;
1352 }
1340 ret = tce_iommu_take_ownership_ddw(container, table_group); 1353 ret = tce_iommu_take_ownership_ddw(container, table_group);
1341 if (!tce_groups_attached(container) && !container->tables[0]) 1354 if (!tce_groups_attached(container) && !container->tables[0])
1342 container->def_window_pending = true; 1355 container->def_window_pending = true;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 32d2633092a3..8549cb111627 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -246,69 +246,46 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn)
246 return ret; 246 return ret;
247} 247}
248 248
249struct vwork { 249static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap)
250 struct mm_struct *mm;
251 long npage;
252 struct work_struct work;
253};
254
255/* delayed decrement/increment for locked_vm */
256static void vfio_lock_acct_bg(struct work_struct *work)
257{ 250{
258 struct vwork *vwork = container_of(work, struct vwork, work);
259 struct mm_struct *mm;
260
261 mm = vwork->mm;
262 down_write(&mm->mmap_sem);
263 mm->locked_vm += vwork->npage;
264 up_write(&mm->mmap_sem);
265 mmput(mm);
266 kfree(vwork);
267}
268
269static void vfio_lock_acct(struct task_struct *task, long npage)
270{
271 struct vwork *vwork;
272 struct mm_struct *mm; 251 struct mm_struct *mm;
273 bool is_current; 252 bool is_current;
253 int ret;
274 254
275 if (!npage) 255 if (!npage)
276 return; 256 return 0;
277 257
278 is_current = (task->mm == current->mm); 258 is_current = (task->mm == current->mm);
279 259
280 mm = is_current ? task->mm : get_task_mm(task); 260 mm = is_current ? task->mm : get_task_mm(task);
281 if (!mm) 261 if (!mm)
282 return; /* process exited */ 262 return -ESRCH; /* process exited */
283 263
284 if (down_write_trylock(&mm->mmap_sem)) { 264 ret = down_write_killable(&mm->mmap_sem);
285 mm->locked_vm += npage; 265 if (!ret) {
286 up_write(&mm->mmap_sem); 266 if (npage > 0) {
287 if (!is_current) 267 if (lock_cap ? !*lock_cap :
288 mmput(mm); 268 !has_capability(task, CAP_IPC_LOCK)) {
289 return; 269 unsigned long limit;
290 } 270
271 limit = task_rlimit(task,
272 RLIMIT_MEMLOCK) >> PAGE_SHIFT;
273
274 if (mm->locked_vm + npage > limit)
275 ret = -ENOMEM;
276 }
277 }
278
279 if (!ret)
280 mm->locked_vm += npage;
291 281
292 if (is_current) { 282 up_write(&mm->mmap_sem);
293 mm = get_task_mm(task);
294 if (!mm)
295 return;
296 } 283 }
297 284
298 /* 285 if (!is_current)
299 * Couldn't get mmap_sem lock, so must setup to update
300 * mm->locked_vm later. If locked_vm were atomic, we
301 * wouldn't need this silliness
302 */
303 vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
304 if (WARN_ON(!vwork)) {
305 mmput(mm); 286 mmput(mm);
306 return; 287
307 } 288 return ret;
308 INIT_WORK(&vwork->work, vfio_lock_acct_bg);
309 vwork->mm = mm;
310 vwork->npage = npage;
311 schedule_work(&vwork->work);
312} 289}
313 290
314/* 291/*
@@ -403,10 +380,10 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
403 * first page and all consecutive pages with the same locking. 380 * first page and all consecutive pages with the same locking.
404 */ 381 */
405static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, 382static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
406 long npage, unsigned long *pfn_base) 383 long npage, unsigned long *pfn_base,
384 bool lock_cap, unsigned long limit)
407{ 385{
408 unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 386 unsigned long pfn = 0;
409 bool lock_cap = capable(CAP_IPC_LOCK);
410 long ret, pinned = 0, lock_acct = 0; 387 long ret, pinned = 0, lock_acct = 0;
411 bool rsvd; 388 bool rsvd;
412 dma_addr_t iova = vaddr - dma->vaddr + dma->iova; 389 dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
@@ -442,8 +419,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
442 /* Lock all the consecutive pages from pfn_base */ 419 /* Lock all the consecutive pages from pfn_base */
443 for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; 420 for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage;
444 pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { 421 pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) {
445 unsigned long pfn = 0;
446
447 ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); 422 ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn);
448 if (ret) 423 if (ret)
449 break; 424 break;
@@ -460,14 +435,25 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
460 put_pfn(pfn, dma->prot); 435 put_pfn(pfn, dma->prot);
461 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", 436 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
462 __func__, limit << PAGE_SHIFT); 437 __func__, limit << PAGE_SHIFT);
463 break; 438 ret = -ENOMEM;
439 goto unpin_out;
464 } 440 }
465 lock_acct++; 441 lock_acct++;
466 } 442 }
467 } 443 }
468 444
469out: 445out:
470 vfio_lock_acct(current, lock_acct); 446 ret = vfio_lock_acct(current, lock_acct, &lock_cap);
447
448unpin_out:
449 if (ret) {
450 if (!rsvd) {
451 for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
452 put_pfn(pfn, dma->prot);
453 }
454
455 return ret;
456 }
471 457
472 return pinned; 458 return pinned;
473} 459}
@@ -488,7 +474,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
488 } 474 }
489 475
490 if (do_accounting) 476 if (do_accounting)
491 vfio_lock_acct(dma->task, locked - unlocked); 477 vfio_lock_acct(dma->task, locked - unlocked, NULL);
492 478
493 return unlocked; 479 return unlocked;
494} 480}
@@ -496,37 +482,26 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
496static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, 482static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
497 unsigned long *pfn_base, bool do_accounting) 483 unsigned long *pfn_base, bool do_accounting)
498{ 484{
499 unsigned long limit;
500 bool lock_cap = has_capability(dma->task, CAP_IPC_LOCK);
501 struct mm_struct *mm; 485 struct mm_struct *mm;
502 int ret; 486 int ret;
503 bool rsvd;
504 487
505 mm = get_task_mm(dma->task); 488 mm = get_task_mm(dma->task);
506 if (!mm) 489 if (!mm)
507 return -ENODEV; 490 return -ENODEV;
508 491
509 ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); 492 ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base);
510 if (ret) 493 if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
511 goto pin_page_exit; 494 ret = vfio_lock_acct(dma->task, 1, NULL);
512 495 if (ret) {
513 rsvd = is_invalid_reserved_pfn(*pfn_base); 496 put_pfn(*pfn_base, dma->prot);
514 limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; 497 if (ret == -ENOMEM)
515 498 pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK "
516 if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) { 499 "(%ld) exceeded\n", __func__,
517 put_pfn(*pfn_base, dma->prot); 500 dma->task->comm, task_pid_nr(dma->task),
518 pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK (%ld) exceeded\n", 501 task_rlimit(dma->task, RLIMIT_MEMLOCK));
519 __func__, dma->task->comm, task_pid_nr(dma->task), 502 }
520 limit << PAGE_SHIFT);
521 ret = -ENOMEM;
522 goto pin_page_exit;
523 } 503 }
524 504
525 if (!rsvd && do_accounting)
526 vfio_lock_acct(dma->task, 1);
527 ret = 1;
528
529pin_page_exit:
530 mmput(mm); 505 mmput(mm);
531 return ret; 506 return ret;
532} 507}
@@ -543,7 +518,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
543 unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); 518 unlocked = vfio_iova_put_vfio_pfn(dma, vpfn);
544 519
545 if (do_accounting) 520 if (do_accounting)
546 vfio_lock_acct(dma->task, -unlocked); 521 vfio_lock_acct(dma->task, -unlocked, NULL);
547 522
548 return unlocked; 523 return unlocked;
549} 524}
@@ -606,10 +581,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
606 remote_vaddr = dma->vaddr + iova - dma->iova; 581 remote_vaddr = dma->vaddr + iova - dma->iova;
607 ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], 582 ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i],
608 do_accounting); 583 do_accounting);
609 if (ret <= 0) { 584 if (ret)
610 WARN_ON(!ret);
611 goto pin_unwind; 585 goto pin_unwind;
612 }
613 586
614 ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); 587 ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
615 if (ret) { 588 if (ret) {
@@ -740,7 +713,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
740 713
741 dma->iommu_mapped = false; 714 dma->iommu_mapped = false;
742 if (do_accounting) { 715 if (do_accounting) {
743 vfio_lock_acct(dma->task, -unlocked); 716 vfio_lock_acct(dma->task, -unlocked, NULL);
744 return 0; 717 return 0;
745 } 718 }
746 return unlocked; 719 return unlocked;
@@ -951,13 +924,15 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
951 unsigned long vaddr = dma->vaddr; 924 unsigned long vaddr = dma->vaddr;
952 size_t size = map_size; 925 size_t size = map_size;
953 long npage; 926 long npage;
954 unsigned long pfn; 927 unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
928 bool lock_cap = capable(CAP_IPC_LOCK);
955 int ret = 0; 929 int ret = 0;
956 930
957 while (size) { 931 while (size) {
958 /* Pin a contiguous chunk of memory */ 932 /* Pin a contiguous chunk of memory */
959 npage = vfio_pin_pages_remote(dma, vaddr + dma->size, 933 npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
960 size >> PAGE_SHIFT, &pfn); 934 size >> PAGE_SHIFT, &pfn,
935 lock_cap, limit);
961 if (npage <= 0) { 936 if (npage <= 0) {
962 WARN_ON(!npage); 937 WARN_ON(!npage);
963 ret = (int)npage; 938 ret = (int)npage;
@@ -1067,6 +1042,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
1067{ 1042{
1068 struct vfio_domain *d; 1043 struct vfio_domain *d;
1069 struct rb_node *n; 1044 struct rb_node *n;
1045 unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1046 bool lock_cap = capable(CAP_IPC_LOCK);
1070 int ret; 1047 int ret;
1071 1048
1072 /* Arbitrarily pick the first domain in the list for lookups */ 1049 /* Arbitrarily pick the first domain in the list for lookups */
@@ -1113,7 +1090,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
1113 1090
1114 npage = vfio_pin_pages_remote(dma, vaddr, 1091 npage = vfio_pin_pages_remote(dma, vaddr,
1115 n >> PAGE_SHIFT, 1092 n >> PAGE_SHIFT,
1116 &pfn); 1093 &pfn, lock_cap,
1094 limit);
1117 if (npage <= 0) { 1095 if (npage <= 0) {
1118 WARN_ON(!npage); 1096 WARN_ON(!npage);
1119 ret = (int)npage; 1097 ret = (int)npage;
@@ -1382,7 +1360,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
1382 if (!is_invalid_reserved_pfn(vpfn->pfn)) 1360 if (!is_invalid_reserved_pfn(vpfn->pfn))
1383 locked++; 1361 locked++;
1384 } 1362 }
1385 vfio_lock_acct(dma->task, locked - unlocked); 1363 vfio_lock_acct(dma->task, locked - unlocked, NULL);
1386 } 1364 }
1387} 1365}
1388 1366