diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-04 14:53:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-04 14:53:24 -0400 |
commit | c336bf8e658122eeab63afe0bfcb6360a381a79c (patch) | |
tree | fb394cb3050fcd64b72508847dca181c7eafc943 | |
parent | a96480723c287c502b02659f4b347aecaa651ea1 (diff) | |
parent | 7cb671e7a34d73b29df77d0b53492277744e57e7 (diff) |
Merge tag 'vfio-v4.12-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Updates for SPAPR IOMMU backend including compatibility test and
memory allocation check (Alexey Kardashevskiy)
- Updates for type1 IOMMU backend to remove asynchronous locked page
accounting and remove redundancy (Alex Williamson)
* tag 'vfio-v4.12-rc1' of git://github.com/awilliam/linux-vfio:
vfio/type1: Reduce repetitive calls in vfio_pin_pages_remote()
vfio/type1: Prune vfio_pin_page_external()
vfio/type1: Remove locked page accounting workqueue
vfio/spapr_tce: Check kzalloc() return when preregistering memory
vfio/powerpc/spapr_tce: Enforce IOMMU type compatibility check
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 13 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 150 |
2 files changed, 77 insertions, 86 deletions
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index cf3de91fbfe7..78dca1aa6410 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c | |||
@@ -198,6 +198,11 @@ static long tce_iommu_register_pages(struct tce_container *container, | |||
198 | return ret; | 198 | return ret; |
199 | 199 | ||
200 | tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL); | 200 | tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL); |
201 | if (!tcemem) { | ||
202 | mm_iommu_put(container->mm, mem); | ||
203 | return -ENOMEM; | ||
204 | } | ||
205 | |||
201 | tcemem->mem = mem; | 206 | tcemem->mem = mem; |
202 | list_add(&tcemem->next, &container->prereg_list); | 207 | list_add(&tcemem->next, &container->prereg_list); |
203 | 208 | ||
@@ -1335,8 +1340,16 @@ static int tce_iommu_attach_group(void *iommu_data, | |||
1335 | 1340 | ||
1336 | if (!table_group->ops || !table_group->ops->take_ownership || | 1341 | if (!table_group->ops || !table_group->ops->take_ownership || |
1337 | !table_group->ops->release_ownership) { | 1342 | !table_group->ops->release_ownership) { |
1343 | if (container->v2) { | ||
1344 | ret = -EPERM; | ||
1345 | goto unlock_exit; | ||
1346 | } | ||
1338 | ret = tce_iommu_take_ownership(container, table_group); | 1347 | ret = tce_iommu_take_ownership(container, table_group); |
1339 | } else { | 1348 | } else { |
1349 | if (!container->v2) { | ||
1350 | ret = -EPERM; | ||
1351 | goto unlock_exit; | ||
1352 | } | ||
1340 | ret = tce_iommu_take_ownership_ddw(container, table_group); | 1353 | ret = tce_iommu_take_ownership_ddw(container, table_group); |
1341 | if (!tce_groups_attached(container) && !container->tables[0]) | 1354 | if (!tce_groups_attached(container) && !container->tables[0]) |
1342 | container->def_window_pending = true; | 1355 | container->def_window_pending = true; |
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 32d2633092a3..8549cb111627 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c | |||
@@ -246,69 +246,46 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn) | |||
246 | return ret; | 246 | return ret; |
247 | } | 247 | } |
248 | 248 | ||
249 | struct vwork { | 249 | static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) |
250 | struct mm_struct *mm; | ||
251 | long npage; | ||
252 | struct work_struct work; | ||
253 | }; | ||
254 | |||
255 | /* delayed decrement/increment for locked_vm */ | ||
256 | static void vfio_lock_acct_bg(struct work_struct *work) | ||
257 | { | 250 | { |
258 | struct vwork *vwork = container_of(work, struct vwork, work); | ||
259 | struct mm_struct *mm; | ||
260 | |||
261 | mm = vwork->mm; | ||
262 | down_write(&mm->mmap_sem); | ||
263 | mm->locked_vm += vwork->npage; | ||
264 | up_write(&mm->mmap_sem); | ||
265 | mmput(mm); | ||
266 | kfree(vwork); | ||
267 | } | ||
268 | |||
269 | static void vfio_lock_acct(struct task_struct *task, long npage) | ||
270 | { | ||
271 | struct vwork *vwork; | ||
272 | struct mm_struct *mm; | 251 | struct mm_struct *mm; |
273 | bool is_current; | 252 | bool is_current; |
253 | int ret; | ||
274 | 254 | ||
275 | if (!npage) | 255 | if (!npage) |
276 | return; | 256 | return 0; |
277 | 257 | ||
278 | is_current = (task->mm == current->mm); | 258 | is_current = (task->mm == current->mm); |
279 | 259 | ||
280 | mm = is_current ? task->mm : get_task_mm(task); | 260 | mm = is_current ? task->mm : get_task_mm(task); |
281 | if (!mm) | 261 | if (!mm) |
282 | return; /* process exited */ | 262 | return -ESRCH; /* process exited */ |
283 | 263 | ||
284 | if (down_write_trylock(&mm->mmap_sem)) { | 264 | ret = down_write_killable(&mm->mmap_sem); |
285 | mm->locked_vm += npage; | 265 | if (!ret) { |
286 | up_write(&mm->mmap_sem); | 266 | if (npage > 0) { |
287 | if (!is_current) | 267 | if (lock_cap ? !*lock_cap : |
288 | mmput(mm); | 268 | !has_capability(task, CAP_IPC_LOCK)) { |
289 | return; | 269 | unsigned long limit; |
290 | } | 270 | |
271 | limit = task_rlimit(task, | ||
272 | RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
273 | |||
274 | if (mm->locked_vm + npage > limit) | ||
275 | ret = -ENOMEM; | ||
276 | } | ||
277 | } | ||
278 | |||
279 | if (!ret) | ||
280 | mm->locked_vm += npage; | ||
291 | 281 | ||
292 | if (is_current) { | 282 | up_write(&mm->mmap_sem); |
293 | mm = get_task_mm(task); | ||
294 | if (!mm) | ||
295 | return; | ||
296 | } | 283 | } |
297 | 284 | ||
298 | /* | 285 | if (!is_current) |
299 | * Couldn't get mmap_sem lock, so must setup to update | ||
300 | * mm->locked_vm later. If locked_vm were atomic, we | ||
301 | * wouldn't need this silliness | ||
302 | */ | ||
303 | vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); | ||
304 | if (WARN_ON(!vwork)) { | ||
305 | mmput(mm); | 286 | mmput(mm); |
306 | return; | 287 | |
307 | } | 288 | return ret; |
308 | INIT_WORK(&vwork->work, vfio_lock_acct_bg); | ||
309 | vwork->mm = mm; | ||
310 | vwork->npage = npage; | ||
311 | schedule_work(&vwork->work); | ||
312 | } | 289 | } |
313 | 290 | ||
314 | /* | 291 | /* |
@@ -403,10 +380,10 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, | |||
403 | * first page and all consecutive pages with the same locking. | 380 | * first page and all consecutive pages with the same locking. |
404 | */ | 381 | */ |
405 | static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, | 382 | static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, |
406 | long npage, unsigned long *pfn_base) | 383 | long npage, unsigned long *pfn_base, |
384 | bool lock_cap, unsigned long limit) | ||
407 | { | 385 | { |
408 | unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | 386 | unsigned long pfn = 0; |
409 | bool lock_cap = capable(CAP_IPC_LOCK); | ||
410 | long ret, pinned = 0, lock_acct = 0; | 387 | long ret, pinned = 0, lock_acct = 0; |
411 | bool rsvd; | 388 | bool rsvd; |
412 | dma_addr_t iova = vaddr - dma->vaddr + dma->iova; | 389 | dma_addr_t iova = vaddr - dma->vaddr + dma->iova; |
@@ -442,8 +419,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, | |||
442 | /* Lock all the consecutive pages from pfn_base */ | 419 | /* Lock all the consecutive pages from pfn_base */ |
443 | for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; | 420 | for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; |
444 | pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { | 421 | pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { |
445 | unsigned long pfn = 0; | ||
446 | |||
447 | ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); | 422 | ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); |
448 | if (ret) | 423 | if (ret) |
449 | break; | 424 | break; |
@@ -460,14 +435,25 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, | |||
460 | put_pfn(pfn, dma->prot); | 435 | put_pfn(pfn, dma->prot); |
461 | pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", | 436 | pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", |
462 | __func__, limit << PAGE_SHIFT); | 437 | __func__, limit << PAGE_SHIFT); |
463 | break; | 438 | ret = -ENOMEM; |
439 | goto unpin_out; | ||
464 | } | 440 | } |
465 | lock_acct++; | 441 | lock_acct++; |
466 | } | 442 | } |
467 | } | 443 | } |
468 | 444 | ||
469 | out: | 445 | out: |
470 | vfio_lock_acct(current, lock_acct); | 446 | ret = vfio_lock_acct(current, lock_acct, &lock_cap); |
447 | |||
448 | unpin_out: | ||
449 | if (ret) { | ||
450 | if (!rsvd) { | ||
451 | for (pfn = *pfn_base ; pinned ; pfn++, pinned--) | ||
452 | put_pfn(pfn, dma->prot); | ||
453 | } | ||
454 | |||
455 | return ret; | ||
456 | } | ||
471 | 457 | ||
472 | return pinned; | 458 | return pinned; |
473 | } | 459 | } |
@@ -488,7 +474,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, | |||
488 | } | 474 | } |
489 | 475 | ||
490 | if (do_accounting) | 476 | if (do_accounting) |
491 | vfio_lock_acct(dma->task, locked - unlocked); | 477 | vfio_lock_acct(dma->task, locked - unlocked, NULL); |
492 | 478 | ||
493 | return unlocked; | 479 | return unlocked; |
494 | } | 480 | } |
@@ -496,37 +482,26 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, | |||
496 | static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, | 482 | static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, |
497 | unsigned long *pfn_base, bool do_accounting) | 483 | unsigned long *pfn_base, bool do_accounting) |
498 | { | 484 | { |
499 | unsigned long limit; | ||
500 | bool lock_cap = has_capability(dma->task, CAP_IPC_LOCK); | ||
501 | struct mm_struct *mm; | 485 | struct mm_struct *mm; |
502 | int ret; | 486 | int ret; |
503 | bool rsvd; | ||
504 | 487 | ||
505 | mm = get_task_mm(dma->task); | 488 | mm = get_task_mm(dma->task); |
506 | if (!mm) | 489 | if (!mm) |
507 | return -ENODEV; | 490 | return -ENODEV; |
508 | 491 | ||
509 | ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); | 492 | ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); |
510 | if (ret) | 493 | if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { |
511 | goto pin_page_exit; | 494 | ret = vfio_lock_acct(dma->task, 1, NULL); |
512 | 495 | if (ret) { | |
513 | rsvd = is_invalid_reserved_pfn(*pfn_base); | 496 | put_pfn(*pfn_base, dma->prot); |
514 | limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; | 497 | if (ret == -ENOMEM) |
515 | 498 | pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK " | |
516 | if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) { | 499 | "(%ld) exceeded\n", __func__, |
517 | put_pfn(*pfn_base, dma->prot); | 500 | dma->task->comm, task_pid_nr(dma->task), |
518 | pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK (%ld) exceeded\n", | 501 | task_rlimit(dma->task, RLIMIT_MEMLOCK)); |
519 | __func__, dma->task->comm, task_pid_nr(dma->task), | 502 | } |
520 | limit << PAGE_SHIFT); | ||
521 | ret = -ENOMEM; | ||
522 | goto pin_page_exit; | ||
523 | } | 503 | } |
524 | 504 | ||
525 | if (!rsvd && do_accounting) | ||
526 | vfio_lock_acct(dma->task, 1); | ||
527 | ret = 1; | ||
528 | |||
529 | pin_page_exit: | ||
530 | mmput(mm); | 505 | mmput(mm); |
531 | return ret; | 506 | return ret; |
532 | } | 507 | } |
@@ -543,7 +518,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, | |||
543 | unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); | 518 | unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); |
544 | 519 | ||
545 | if (do_accounting) | 520 | if (do_accounting) |
546 | vfio_lock_acct(dma->task, -unlocked); | 521 | vfio_lock_acct(dma->task, -unlocked, NULL); |
547 | 522 | ||
548 | return unlocked; | 523 | return unlocked; |
549 | } | 524 | } |
@@ -606,10 +581,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, | |||
606 | remote_vaddr = dma->vaddr + iova - dma->iova; | 581 | remote_vaddr = dma->vaddr + iova - dma->iova; |
607 | ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], | 582 | ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], |
608 | do_accounting); | 583 | do_accounting); |
609 | if (ret <= 0) { | 584 | if (ret) |
610 | WARN_ON(!ret); | ||
611 | goto pin_unwind; | 585 | goto pin_unwind; |
612 | } | ||
613 | 586 | ||
614 | ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); | 587 | ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); |
615 | if (ret) { | 588 | if (ret) { |
@@ -740,7 +713,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, | |||
740 | 713 | ||
741 | dma->iommu_mapped = false; | 714 | dma->iommu_mapped = false; |
742 | if (do_accounting) { | 715 | if (do_accounting) { |
743 | vfio_lock_acct(dma->task, -unlocked); | 716 | vfio_lock_acct(dma->task, -unlocked, NULL); |
744 | return 0; | 717 | return 0; |
745 | } | 718 | } |
746 | return unlocked; | 719 | return unlocked; |
@@ -951,13 +924,15 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, | |||
951 | unsigned long vaddr = dma->vaddr; | 924 | unsigned long vaddr = dma->vaddr; |
952 | size_t size = map_size; | 925 | size_t size = map_size; |
953 | long npage; | 926 | long npage; |
954 | unsigned long pfn; | 927 | unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; |
928 | bool lock_cap = capable(CAP_IPC_LOCK); | ||
955 | int ret = 0; | 929 | int ret = 0; |
956 | 930 | ||
957 | while (size) { | 931 | while (size) { |
958 | /* Pin a contiguous chunk of memory */ | 932 | /* Pin a contiguous chunk of memory */ |
959 | npage = vfio_pin_pages_remote(dma, vaddr + dma->size, | 933 | npage = vfio_pin_pages_remote(dma, vaddr + dma->size, |
960 | size >> PAGE_SHIFT, &pfn); | 934 | size >> PAGE_SHIFT, &pfn, |
935 | lock_cap, limit); | ||
961 | if (npage <= 0) { | 936 | if (npage <= 0) { |
962 | WARN_ON(!npage); | 937 | WARN_ON(!npage); |
963 | ret = (int)npage; | 938 | ret = (int)npage; |
@@ -1067,6 +1042,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, | |||
1067 | { | 1042 | { |
1068 | struct vfio_domain *d; | 1043 | struct vfio_domain *d; |
1069 | struct rb_node *n; | 1044 | struct rb_node *n; |
1045 | unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
1046 | bool lock_cap = capable(CAP_IPC_LOCK); | ||
1070 | int ret; | 1047 | int ret; |
1071 | 1048 | ||
1072 | /* Arbitrarily pick the first domain in the list for lookups */ | 1049 | /* Arbitrarily pick the first domain in the list for lookups */ |
@@ -1113,7 +1090,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, | |||
1113 | 1090 | ||
1114 | npage = vfio_pin_pages_remote(dma, vaddr, | 1091 | npage = vfio_pin_pages_remote(dma, vaddr, |
1115 | n >> PAGE_SHIFT, | 1092 | n >> PAGE_SHIFT, |
1116 | &pfn); | 1093 | &pfn, lock_cap, |
1094 | limit); | ||
1117 | if (npage <= 0) { | 1095 | if (npage <= 0) { |
1118 | WARN_ON(!npage); | 1096 | WARN_ON(!npage); |
1119 | ret = (int)npage; | 1097 | ret = (int)npage; |
@@ -1382,7 +1360,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) | |||
1382 | if (!is_invalid_reserved_pfn(vpfn->pfn)) | 1360 | if (!is_invalid_reserved_pfn(vpfn->pfn)) |
1383 | locked++; | 1361 | locked++; |
1384 | } | 1362 | } |
1385 | vfio_lock_acct(dma->task, locked - unlocked); | 1363 | vfio_lock_acct(dma->task, locked - unlocked, NULL); |
1386 | } | 1364 | } |
1387 | } | 1365 | } |
1388 | 1366 | ||