diff options
| author | Dave Airlie <airlied@redhat.com> | 2018-03-28 00:49:19 -0400 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2018-03-28 00:49:19 -0400 |
| commit | 9f36f9c8eed847ee8920ecac689bbf3dd4660774 (patch) | |
| tree | 88377565adc8d3261f90f07986c3234571a05601 | |
| parent | cb17aa521e0e00db92463ad306660f3f23ae9657 (diff) | |
| parent | 1679ae8f8f4148766423066aeb3dbb0a985a373a (diff) | |
Merge tag 'drm-amdkfd-next-2018-03-27' of git://people.freedesktop.org/~gabbayo/linux into drm-next
- GPUVM support for dGPUs
- KFD events support for dGPUs
- Fix live-lock situation when restoring multiple evicted processes
- Fix VM page table allocation on large-bar systems
- Fix for build failure on frv architecture
* tag 'drm-amdkfd-next-2018-03-27' of git://people.freedesktop.org/~gabbayo/linux:
drm/amdkfd: Use ordered workqueue to restore processes
drm/amdgpu: Fix acquiring VM on large-BAR systems
drm/amdkfd: Add module option for testing large-BAR functionality
drm/amdkfd: Kmap event page for dGPUs
drm/amdkfd: Add ioctls for GPUVM memory management
drm/amdkfd: Add TC flush on VMID deallocation for Hawaii
drm/amdkfd: Allocate CWSR trap handler memory for dGPUs
drm/amdkfd: Add per-process IDR for buffer handles
drm/amdkfd: Aperture setup for dGPUs
drm/amdkfd: Remove limit on number of GPUs
drm/amdkfd: Populate DRM render device minor
drm/amdkfd: Create KFD VMs on demand
drm/amdgpu: Add kfd2kgd interface to acquire an existing VM
drm/amdgpu: Add helper to turn an existing VM into a compute VM
drm/amdgpu: Fix initial validation of PD BO for KFD VMs
drm/amdgpu: Move KFD-specific fields into struct amdgpu_vm
drm/amdkfd: fix uninitialized variable use
drm/amdkfd: add missing include of mm.h
19 files changed, 1398 insertions, 165 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index d7509b706b26..c2c2bea731e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #define AMDGPU_AMDKFD_H_INCLUDED | 26 | #define AMDGPU_AMDKFD_H_INCLUDED |
| 27 | 27 | ||
| 28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
| 29 | #include <linux/mm.h> | ||
| 29 | #include <linux/mmu_context.h> | 30 | #include <linux/mmu_context.h> |
| 30 | #include <kgd_kfd_interface.h> | 31 | #include <kgd_kfd_interface.h> |
| 31 | #include <drm/ttm/ttm_execbuf_util.h> | 32 | #include <drm/ttm/ttm_execbuf_util.h> |
| @@ -92,27 +93,6 @@ struct amdkfd_process_info { | |||
| 92 | struct amdgpu_amdkfd_fence *eviction_fence; | 93 | struct amdgpu_amdkfd_fence *eviction_fence; |
| 93 | }; | 94 | }; |
| 94 | 95 | ||
| 95 | /* struct amdkfd_vm - | ||
| 96 | * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs | ||
| 97 | * belonging to a KFD process. All the VMs belonging to the same process point | ||
| 98 | * to the same amdkfd_process_info. | ||
| 99 | */ | ||
| 100 | struct amdkfd_vm { | ||
| 101 | /* Keep base as the first parameter for pointer compatibility between | ||
| 102 | * amdkfd_vm and amdgpu_vm. | ||
| 103 | */ | ||
| 104 | struct amdgpu_vm base; | ||
| 105 | |||
| 106 | /* List node in amdkfd_process_info.vm_list_head*/ | ||
| 107 | struct list_head vm_list_node; | ||
| 108 | |||
| 109 | struct amdgpu_device *adev; | ||
| 110 | /* Points to the KFD process VM info*/ | ||
| 111 | struct amdkfd_process_info *process_info; | ||
| 112 | |||
| 113 | uint64_t pd_phys_addr; | ||
| 114 | }; | ||
| 115 | |||
| 116 | int amdgpu_amdkfd_init(void); | 96 | int amdgpu_amdkfd_init(void); |
| 117 | void amdgpu_amdkfd_fini(void); | 97 | void amdgpu_amdkfd_fini(void); |
| 118 | 98 | ||
| @@ -165,6 +145,12 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); | |||
| 165 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, | 145 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, |
| 166 | void **process_info, | 146 | void **process_info, |
| 167 | struct dma_fence **ef); | 147 | struct dma_fence **ef); |
| 148 | int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, | ||
| 149 | struct file *filp, | ||
| 150 | void **vm, void **process_info, | ||
| 151 | struct dma_fence **ef); | ||
| 152 | void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, | ||
| 153 | struct amdgpu_vm *vm); | ||
| 168 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); | 154 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); |
| 169 | uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); | 155 | uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); |
| 170 | int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | 156 | int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 7485c376b90e..ea54e53172b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | |||
| @@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
| 205 | .get_cu_info = get_cu_info, | 205 | .get_cu_info = get_cu_info, |
| 206 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, | 206 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, |
| 207 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | 207 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, |
| 208 | .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, | ||
| 208 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | 209 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, |
| 209 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | 210 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, |
| 210 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | 211 | .set_vm_context_page_table_base = set_vm_context_page_table_base, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 7be453494423..89264c9a5e9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | |||
| @@ -165,6 +165,7 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
| 165 | .get_cu_info = get_cu_info, | 165 | .get_cu_info = get_cu_info, |
| 166 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, | 166 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, |
| 167 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | 167 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, |
| 168 | .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, | ||
| 168 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | 169 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, |
| 169 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | 170 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, |
| 170 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | 171 | .set_vm_context_page_table_base = set_vm_context_page_table_base, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a12a1654e124..1d6e1479da38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |||
| @@ -333,9 +333,9 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) | |||
| 333 | * again. Page directories are only updated after updating page | 333 | * again. Page directories are only updated after updating page |
| 334 | * tables. | 334 | * tables. |
| 335 | */ | 335 | */ |
| 336 | static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) | 336 | static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) |
| 337 | { | 337 | { |
| 338 | struct amdgpu_bo *pd = vm->base.root.base.bo; | 338 | struct amdgpu_bo *pd = vm->root.base.bo; |
| 339 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); | 339 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); |
| 340 | struct amdgpu_vm_parser param; | 340 | struct amdgpu_vm_parser param; |
| 341 | uint64_t addr, flags = AMDGPU_PTE_VALID; | 341 | uint64_t addr, flags = AMDGPU_PTE_VALID; |
| @@ -344,7 +344,7 @@ static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) | |||
| 344 | param.domain = AMDGPU_GEM_DOMAIN_VRAM; | 344 | param.domain = AMDGPU_GEM_DOMAIN_VRAM; |
| 345 | param.wait = false; | 345 | param.wait = false; |
| 346 | 346 | ||
| 347 | ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, | 347 | ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, |
| 348 | ¶m); | 348 | ¶m); |
| 349 | if (ret) { | 349 | if (ret) { |
| 350 | pr_err("amdgpu: failed to validate PT BOs\n"); | 350 | pr_err("amdgpu: failed to validate PT BOs\n"); |
| @@ -357,11 +357,11 @@ static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) | |||
| 357 | return ret; | 357 | return ret; |
| 358 | } | 358 | } |
| 359 | 359 | ||
| 360 | addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); | 360 | addr = amdgpu_bo_gpu_offset(vm->root.base.bo); |
| 361 | amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); | 361 | amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); |
| 362 | vm->pd_phys_addr = addr; | 362 | vm->pd_phys_addr = addr; |
| 363 | 363 | ||
| 364 | if (vm->base.use_cpu_for_update) { | 364 | if (vm->use_cpu_for_update) { |
| 365 | ret = amdgpu_bo_kmap(pd, NULL); | 365 | ret = amdgpu_bo_kmap(pd, NULL); |
| 366 | if (ret) { | 366 | if (ret) { |
| 367 | pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); | 367 | pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); |
| @@ -415,14 +415,12 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) | |||
| 415 | * 4a. Validate new page tables and directories | 415 | * 4a. Validate new page tables and directories |
| 416 | */ | 416 | */ |
| 417 | static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, | 417 | static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, |
| 418 | struct amdgpu_vm *avm, bool is_aql, | 418 | struct amdgpu_vm *vm, bool is_aql, |
| 419 | struct kfd_bo_va_list **p_bo_va_entry) | 419 | struct kfd_bo_va_list **p_bo_va_entry) |
| 420 | { | 420 | { |
| 421 | int ret; | 421 | int ret; |
| 422 | struct kfd_bo_va_list *bo_va_entry; | 422 | struct kfd_bo_va_list *bo_va_entry; |
| 423 | struct amdkfd_vm *kvm = container_of(avm, | 423 | struct amdgpu_bo *pd = vm->root.base.bo; |
| 424 | struct amdkfd_vm, base); | ||
| 425 | struct amdgpu_bo *pd = avm->root.base.bo; | ||
| 426 | struct amdgpu_bo *bo = mem->bo; | 424 | struct amdgpu_bo *bo = mem->bo; |
| 427 | uint64_t va = mem->va; | 425 | uint64_t va = mem->va; |
| 428 | struct list_head *list_bo_va = &mem->bo_va_list; | 426 | struct list_head *list_bo_va = &mem->bo_va_list; |
| @@ -441,10 +439,10 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, | |||
| 441 | return -ENOMEM; | 439 | return -ENOMEM; |
| 442 | 440 | ||
| 443 | pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, | 441 | pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, |
| 444 | va + bo_size, avm); | 442 | va + bo_size, vm); |
| 445 | 443 | ||
| 446 | /* Add BO to VM internal data structures*/ | 444 | /* Add BO to VM internal data structures*/ |
| 447 | bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); | 445 | bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo); |
| 448 | if (!bo_va_entry->bo_va) { | 446 | if (!bo_va_entry->bo_va) { |
| 449 | ret = -EINVAL; | 447 | ret = -EINVAL; |
| 450 | pr_err("Failed to add BO object to VM. ret == %d\n", | 448 | pr_err("Failed to add BO object to VM. ret == %d\n", |
| @@ -467,28 +465,28 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, | |||
| 467 | * fence, so remove it temporarily. | 465 | * fence, so remove it temporarily. |
| 468 | */ | 466 | */ |
| 469 | amdgpu_amdkfd_remove_eviction_fence(pd, | 467 | amdgpu_amdkfd_remove_eviction_fence(pd, |
| 470 | kvm->process_info->eviction_fence, | 468 | vm->process_info->eviction_fence, |
| 471 | NULL, NULL); | 469 | NULL, NULL); |
| 472 | 470 | ||
| 473 | ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); | 471 | ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); |
| 474 | if (ret) { | 472 | if (ret) { |
| 475 | pr_err("Failed to allocate pts, err=%d\n", ret); | 473 | pr_err("Failed to allocate pts, err=%d\n", ret); |
| 476 | goto err_alloc_pts; | 474 | goto err_alloc_pts; |
| 477 | } | 475 | } |
| 478 | 476 | ||
| 479 | ret = vm_validate_pt_pd_bos(kvm); | 477 | ret = vm_validate_pt_pd_bos(vm); |
| 480 | if (ret) { | 478 | if (ret) { |
| 481 | pr_err("validate_pt_pd_bos() failed\n"); | 479 | pr_err("validate_pt_pd_bos() failed\n"); |
| 482 | goto err_alloc_pts; | 480 | goto err_alloc_pts; |
| 483 | } | 481 | } |
| 484 | 482 | ||
| 485 | /* Add the eviction fence back */ | 483 | /* Add the eviction fence back */ |
| 486 | amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); | 484 | amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); |
| 487 | 485 | ||
| 488 | return 0; | 486 | return 0; |
| 489 | 487 | ||
| 490 | err_alloc_pts: | 488 | err_alloc_pts: |
| 491 | amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); | 489 | amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); |
| 492 | amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); | 490 | amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); |
| 493 | list_del(&bo_va_entry->bo_list); | 491 | list_del(&bo_va_entry->bo_list); |
| 494 | err_vmadd: | 492 | err_vmadd: |
| @@ -703,7 +701,6 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, | |||
| 703 | { | 701 | { |
| 704 | struct amdgpu_bo_va *bo_va = entry->bo_va; | 702 | struct amdgpu_bo_va *bo_va = entry->bo_va; |
| 705 | struct amdgpu_vm *vm = bo_va->base.vm; | 703 | struct amdgpu_vm *vm = bo_va->base.vm; |
| 706 | struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base); | ||
| 707 | struct amdgpu_bo *pd = vm->root.base.bo; | 704 | struct amdgpu_bo *pd = vm->root.base.bo; |
| 708 | 705 | ||
| 709 | /* Remove eviction fence from PD (and thereby from PTs too as | 706 | /* Remove eviction fence from PD (and thereby from PTs too as |
| @@ -713,14 +710,14 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, | |||
| 713 | * trigger the eviction fence. | 710 | * trigger the eviction fence. |
| 714 | */ | 711 | */ |
| 715 | amdgpu_amdkfd_remove_eviction_fence(pd, | 712 | amdgpu_amdkfd_remove_eviction_fence(pd, |
| 716 | kvm->process_info->eviction_fence, | 713 | vm->process_info->eviction_fence, |
| 717 | NULL, NULL); | 714 | NULL, NULL); |
| 718 | amdgpu_vm_bo_unmap(adev, bo_va, entry->va); | 715 | amdgpu_vm_bo_unmap(adev, bo_va, entry->va); |
| 719 | 716 | ||
| 720 | amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); | 717 | amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); |
| 721 | 718 | ||
| 722 | /* Add the eviction fence back */ | 719 | /* Add the eviction fence back */ |
| 723 | amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); | 720 | amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); |
| 724 | 721 | ||
| 725 | sync_vm_fence(adev, sync, bo_va->last_pt_update); | 722 | sync_vm_fence(adev, sync, bo_va->last_pt_update); |
| 726 | 723 | ||
| @@ -780,7 +777,7 @@ update_gpuvm_pte_failed: | |||
| 780 | 777 | ||
| 781 | static int process_validate_vms(struct amdkfd_process_info *process_info) | 778 | static int process_validate_vms(struct amdkfd_process_info *process_info) |
| 782 | { | 779 | { |
| 783 | struct amdkfd_vm *peer_vm; | 780 | struct amdgpu_vm *peer_vm; |
| 784 | int ret; | 781 | int ret; |
| 785 | 782 | ||
| 786 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | 783 | list_for_each_entry(peer_vm, &process_info->vm_list_head, |
| @@ -796,12 +793,12 @@ static int process_validate_vms(struct amdkfd_process_info *process_info) | |||
| 796 | static int process_update_pds(struct amdkfd_process_info *process_info, | 793 | static int process_update_pds(struct amdkfd_process_info *process_info, |
| 797 | struct amdgpu_sync *sync) | 794 | struct amdgpu_sync *sync) |
| 798 | { | 795 | { |
| 799 | struct amdkfd_vm *peer_vm; | 796 | struct amdgpu_vm *peer_vm; |
| 800 | int ret; | 797 | int ret; |
| 801 | 798 | ||
| 802 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | 799 | list_for_each_entry(peer_vm, &process_info->vm_list_head, |
| 803 | vm_list_node) { | 800 | vm_list_node) { |
| 804 | ret = vm_update_pds(&peer_vm->base, sync); | 801 | ret = vm_update_pds(peer_vm, sync); |
| 805 | if (ret) | 802 | if (ret) |
| 806 | return ret; | 803 | return ret; |
| 807 | } | 804 | } |
| @@ -809,33 +806,16 @@ static int process_update_pds(struct amdkfd_process_info *process_info, | |||
| 809 | return 0; | 806 | return 0; |
| 810 | } | 807 | } |
| 811 | 808 | ||
| 812 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, | 809 | static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, |
| 813 | void **process_info, | 810 | struct dma_fence **ef) |
| 814 | struct dma_fence **ef) | ||
| 815 | { | 811 | { |
| 812 | struct amdkfd_process_info *info = NULL; | ||
| 816 | int ret; | 813 | int ret; |
| 817 | struct amdkfd_vm *new_vm; | ||
| 818 | struct amdkfd_process_info *info; | ||
| 819 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 820 | |||
| 821 | new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); | ||
| 822 | if (!new_vm) | ||
| 823 | return -ENOMEM; | ||
| 824 | |||
| 825 | /* Initialize the VM context, allocate the page directory and zero it */ | ||
| 826 | ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0); | ||
| 827 | if (ret) { | ||
| 828 | pr_err("Failed init vm ret %d\n", ret); | ||
| 829 | goto vm_init_fail; | ||
| 830 | } | ||
| 831 | new_vm->adev = adev; | ||
| 832 | 814 | ||
| 833 | if (!*process_info) { | 815 | if (!*process_info) { |
| 834 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 816 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
| 835 | if (!info) { | 817 | if (!info) |
| 836 | ret = -ENOMEM; | 818 | return -ENOMEM; |
| 837 | goto alloc_process_info_fail; | ||
| 838 | } | ||
| 839 | 819 | ||
| 840 | mutex_init(&info->lock); | 820 | mutex_init(&info->lock); |
| 841 | INIT_LIST_HEAD(&info->vm_list_head); | 821 | INIT_LIST_HEAD(&info->vm_list_head); |
| @@ -846,6 +826,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, | |||
| 846 | current->mm); | 826 | current->mm); |
| 847 | if (!info->eviction_fence) { | 827 | if (!info->eviction_fence) { |
| 848 | pr_err("Failed to create eviction fence\n"); | 828 | pr_err("Failed to create eviction fence\n"); |
| 829 | ret = -ENOMEM; | ||
| 849 | goto create_evict_fence_fail; | 830 | goto create_evict_fence_fail; |
| 850 | } | 831 | } |
| 851 | 832 | ||
| @@ -853,57 +834,137 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, | |||
| 853 | *ef = dma_fence_get(&info->eviction_fence->base); | 834 | *ef = dma_fence_get(&info->eviction_fence->base); |
| 854 | } | 835 | } |
| 855 | 836 | ||
| 856 | new_vm->process_info = *process_info; | 837 | vm->process_info = *process_info; |
| 857 | |||
| 858 | mutex_lock(&new_vm->process_info->lock); | ||
| 859 | list_add_tail(&new_vm->vm_list_node, | ||
| 860 | &(new_vm->process_info->vm_list_head)); | ||
| 861 | new_vm->process_info->n_vms++; | ||
| 862 | mutex_unlock(&new_vm->process_info->lock); | ||
| 863 | 838 | ||
| 864 | *vm = (void *) new_vm; | 839 | /* Validate page directory and attach eviction fence */ |
| 840 | ret = amdgpu_bo_reserve(vm->root.base.bo, true); | ||
| 841 | if (ret) | ||
| 842 | goto reserve_pd_fail; | ||
| 843 | ret = vm_validate_pt_pd_bos(vm); | ||
| 844 | if (ret) { | ||
| 845 | pr_err("validate_pt_pd_bos() failed\n"); | ||
| 846 | goto validate_pd_fail; | ||
| 847 | } | ||
| 848 | ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false); | ||
| 849 | if (ret) | ||
| 850 | goto wait_pd_fail; | ||
| 851 | amdgpu_bo_fence(vm->root.base.bo, | ||
| 852 | &vm->process_info->eviction_fence->base, true); | ||
| 853 | amdgpu_bo_unreserve(vm->root.base.bo); | ||
| 854 | |||
| 855 | /* Update process info */ | ||
| 856 | mutex_lock(&vm->process_info->lock); | ||
| 857 | list_add_tail(&vm->vm_list_node, | ||
| 858 | &(vm->process_info->vm_list_head)); | ||
| 859 | vm->process_info->n_vms++; | ||
| 860 | mutex_unlock(&vm->process_info->lock); | ||
| 865 | 861 | ||
| 866 | pr_debug("Created process vm %p\n", *vm); | 862 | return 0; |
| 867 | 863 | ||
| 864 | wait_pd_fail: | ||
| 865 | validate_pd_fail: | ||
| 866 | amdgpu_bo_unreserve(vm->root.base.bo); | ||
| 867 | reserve_pd_fail: | ||
| 868 | vm->process_info = NULL; | ||
| 869 | if (info) { | ||
| 870 | /* Two fence references: one in info and one in *ef */ | ||
| 871 | dma_fence_put(&info->eviction_fence->base); | ||
| 872 | dma_fence_put(*ef); | ||
| 873 | *ef = NULL; | ||
| 874 | *process_info = NULL; | ||
| 875 | create_evict_fence_fail: | ||
| 876 | mutex_destroy(&info->lock); | ||
| 877 | kfree(info); | ||
| 878 | } | ||
| 868 | return ret; | 879 | return ret; |
| 880 | } | ||
| 869 | 881 | ||
| 870 | create_evict_fence_fail: | 882 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, |
| 871 | mutex_destroy(&info->lock); | 883 | void **process_info, |
| 872 | kfree(info); | 884 | struct dma_fence **ef) |
| 873 | alloc_process_info_fail: | 885 | { |
| 874 | amdgpu_vm_fini(adev, &new_vm->base); | 886 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
| 875 | vm_init_fail: | 887 | struct amdgpu_vm *new_vm; |
| 888 | int ret; | ||
| 889 | |||
| 890 | new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); | ||
| 891 | if (!new_vm) | ||
| 892 | return -ENOMEM; | ||
| 893 | |||
| 894 | /* Initialize AMDGPU part of the VM */ | ||
| 895 | ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); | ||
| 896 | if (ret) { | ||
| 897 | pr_err("Failed init vm ret %d\n", ret); | ||
| 898 | goto amdgpu_vm_init_fail; | ||
| 899 | } | ||
| 900 | |||
| 901 | /* Initialize KFD part of the VM and process info */ | ||
| 902 | ret = init_kfd_vm(new_vm, process_info, ef); | ||
| 903 | if (ret) | ||
| 904 | goto init_kfd_vm_fail; | ||
| 905 | |||
| 906 | *vm = (void *) new_vm; | ||
| 907 | |||
| 908 | return 0; | ||
| 909 | |||
| 910 | init_kfd_vm_fail: | ||
| 911 | amdgpu_vm_fini(adev, new_vm); | ||
| 912 | amdgpu_vm_init_fail: | ||
| 876 | kfree(new_vm); | 913 | kfree(new_vm); |
| 877 | return ret; | 914 | return ret; |
| 878 | |||
| 879 | } | 915 | } |
| 880 | 916 | ||
| 881 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) | 917 | int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, |
| 918 | struct file *filp, | ||
| 919 | void **vm, void **process_info, | ||
| 920 | struct dma_fence **ef) | ||
| 882 | { | 921 | { |
| 883 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 922 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
| 884 | struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; | 923 | struct drm_file *drm_priv = filp->private_data; |
| 885 | struct amdgpu_vm *avm = &kfd_vm->base; | 924 | struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; |
| 886 | struct amdgpu_bo *pd; | 925 | struct amdgpu_vm *avm = &drv_priv->vm; |
| 887 | struct amdkfd_process_info *process_info; | 926 | int ret; |
| 888 | 927 | ||
| 889 | if (WARN_ON(!kgd || !vm)) | 928 | /* Already a compute VM? */ |
| 929 | if (avm->process_info) | ||
| 930 | return -EINVAL; | ||
| 931 | |||
| 932 | /* Convert VM into a compute VM */ | ||
| 933 | ret = amdgpu_vm_make_compute(adev, avm); | ||
| 934 | if (ret) | ||
| 935 | return ret; | ||
| 936 | |||
| 937 | /* Initialize KFD part of the VM and process info */ | ||
| 938 | ret = init_kfd_vm(avm, process_info, ef); | ||
| 939 | if (ret) | ||
| 940 | return ret; | ||
| 941 | |||
| 942 | *vm = (void *)avm; | ||
| 943 | |||
| 944 | return 0; | ||
| 945 | } | ||
| 946 | |||
| 947 | void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, | ||
| 948 | struct amdgpu_vm *vm) | ||
| 949 | { | ||
| 950 | struct amdkfd_process_info *process_info = vm->process_info; | ||
| 951 | struct amdgpu_bo *pd = vm->root.base.bo; | ||
| 952 | |||
| 953 | if (!process_info) | ||
| 890 | return; | 954 | return; |
| 891 | 955 | ||
| 892 | pr_debug("Destroying process vm %p\n", vm); | ||
| 893 | /* Release eviction fence from PD */ | 956 | /* Release eviction fence from PD */ |
| 894 | pd = avm->root.base.bo; | ||
| 895 | amdgpu_bo_reserve(pd, false); | 957 | amdgpu_bo_reserve(pd, false); |
| 896 | amdgpu_bo_fence(pd, NULL, false); | 958 | amdgpu_bo_fence(pd, NULL, false); |
| 897 | amdgpu_bo_unreserve(pd); | 959 | amdgpu_bo_unreserve(pd); |
| 898 | 960 | ||
| 899 | process_info = kfd_vm->process_info; | 961 | /* Update process info */ |
| 900 | |||
| 901 | mutex_lock(&process_info->lock); | 962 | mutex_lock(&process_info->lock); |
| 902 | process_info->n_vms--; | 963 | process_info->n_vms--; |
| 903 | list_del(&kfd_vm->vm_list_node); | 964 | list_del(&vm->vm_list_node); |
| 904 | mutex_unlock(&process_info->lock); | 965 | mutex_unlock(&process_info->lock); |
| 905 | 966 | ||
| 906 | /* Release per-process resources */ | 967 | /* Release per-process resources when last compute VM is destroyed */ |
| 907 | if (!process_info->n_vms) { | 968 | if (!process_info->n_vms) { |
| 908 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); | 969 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); |
| 909 | 970 | ||
| @@ -911,6 +972,17 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) | |||
| 911 | mutex_destroy(&process_info->lock); | 972 | mutex_destroy(&process_info->lock); |
| 912 | kfree(process_info); | 973 | kfree(process_info); |
| 913 | } | 974 | } |
| 975 | } | ||
| 976 | |||
| 977 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) | ||
| 978 | { | ||
| 979 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 980 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; | ||
| 981 | |||
| 982 | if (WARN_ON(!kgd || !vm)) | ||
| 983 | return; | ||
| 984 | |||
| 985 | pr_debug("Destroying process vm %p\n", vm); | ||
| 914 | 986 | ||
| 915 | /* Release the VM context */ | 987 | /* Release the VM context */ |
| 916 | amdgpu_vm_fini(adev, avm); | 988 | amdgpu_vm_fini(adev, avm); |
| @@ -919,7 +991,7 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) | |||
| 919 | 991 | ||
| 920 | uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) | 992 | uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) |
| 921 | { | 993 | { |
| 922 | struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; | 994 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
| 923 | 995 | ||
| 924 | return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; | 996 | return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; |
| 925 | } | 997 | } |
| @@ -930,7 +1002,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
| 930 | uint64_t *offset, uint32_t flags) | 1002 | uint64_t *offset, uint32_t flags) |
| 931 | { | 1003 | { |
| 932 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 1004 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
| 933 | struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; | 1005 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
| 934 | struct amdgpu_bo *bo; | 1006 | struct amdgpu_bo *bo; |
| 935 | int byte_align; | 1007 | int byte_align; |
| 936 | u32 alloc_domain; | 1008 | u32 alloc_domain; |
| @@ -1010,8 +1082,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
| 1010 | (*mem)->va = va; | 1082 | (*mem)->va = va; |
| 1011 | (*mem)->domain = alloc_domain; | 1083 | (*mem)->domain = alloc_domain; |
| 1012 | (*mem)->mapped_to_gpu_memory = 0; | 1084 | (*mem)->mapped_to_gpu_memory = 0; |
| 1013 | (*mem)->process_info = kfd_vm->process_info; | 1085 | (*mem)->process_info = avm->process_info; |
| 1014 | add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); | 1086 | add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); |
| 1015 | 1087 | ||
| 1016 | if (offset) | 1088 | if (offset) |
| 1017 | *offset = amdgpu_bo_mmap_offset(bo); | 1089 | *offset = amdgpu_bo_mmap_offset(bo); |
| @@ -1092,7 +1164,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
| 1092 | struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) | 1164 | struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) |
| 1093 | { | 1165 | { |
| 1094 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 1166 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
| 1095 | struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; | 1167 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
| 1096 | int ret; | 1168 | int ret; |
| 1097 | struct amdgpu_bo *bo; | 1169 | struct amdgpu_bo *bo; |
| 1098 | uint32_t domain; | 1170 | uint32_t domain; |
| @@ -1128,19 +1200,19 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
| 1128 | if (unlikely(ret)) | 1200 | if (unlikely(ret)) |
| 1129 | goto out; | 1201 | goto out; |
| 1130 | 1202 | ||
| 1131 | if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { | 1203 | if (check_if_add_bo_to_vm(avm, mem)) { |
| 1132 | ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, | 1204 | ret = add_bo_to_vm(adev, mem, avm, false, |
| 1133 | &bo_va_entry); | 1205 | &bo_va_entry); |
| 1134 | if (ret) | 1206 | if (ret) |
| 1135 | goto add_bo_to_vm_failed; | 1207 | goto add_bo_to_vm_failed; |
| 1136 | if (mem->aql_queue) { | 1208 | if (mem->aql_queue) { |
| 1137 | ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, | 1209 | ret = add_bo_to_vm(adev, mem, avm, |
| 1138 | true, &bo_va_entry_aql); | 1210 | true, &bo_va_entry_aql); |
| 1139 | if (ret) | 1211 | if (ret) |
| 1140 | goto add_bo_to_vm_failed_aql; | 1212 | goto add_bo_to_vm_failed_aql; |
| 1141 | } | 1213 | } |
| 1142 | } else { | 1214 | } else { |
| 1143 | ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); | 1215 | ret = vm_validate_pt_pd_bos(avm); |
| 1144 | if (unlikely(ret)) | 1216 | if (unlikely(ret)) |
| 1145 | goto add_bo_to_vm_failed; | 1217 | goto add_bo_to_vm_failed; |
| 1146 | } | 1218 | } |
| @@ -1184,7 +1256,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
| 1184 | 1256 | ||
| 1185 | if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) | 1257 | if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) |
| 1186 | amdgpu_bo_fence(bo, | 1258 | amdgpu_bo_fence(bo, |
| 1187 | &kfd_vm->process_info->eviction_fence->base, | 1259 | &avm->process_info->eviction_fence->base, |
| 1188 | true); | 1260 | true); |
| 1189 | ret = unreserve_bo_and_vms(&ctx, false, false); | 1261 | ret = unreserve_bo_and_vms(&ctx, false, false); |
| 1190 | 1262 | ||
| @@ -1209,7 +1281,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( | |||
| 1209 | { | 1281 | { |
| 1210 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 1282 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
| 1211 | struct amdkfd_process_info *process_info = | 1283 | struct amdkfd_process_info *process_info = |
| 1212 | ((struct amdkfd_vm *)vm)->process_info; | 1284 | ((struct amdgpu_vm *)vm)->process_info; |
| 1213 | unsigned long bo_size = mem->bo->tbo.mem.size; | 1285 | unsigned long bo_size = mem->bo->tbo.mem.size; |
| 1214 | struct kfd_bo_va_list *entry; | 1286 | struct kfd_bo_va_list *entry; |
| 1215 | struct bo_vm_reservation_context ctx; | 1287 | struct bo_vm_reservation_context ctx; |
| @@ -1226,7 +1298,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( | |||
| 1226 | goto unreserve_out; | 1298 | goto unreserve_out; |
| 1227 | } | 1299 | } |
| 1228 | 1300 | ||
| 1229 | ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); | 1301 | ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); |
| 1230 | if (unlikely(ret)) | 1302 | if (unlikely(ret)) |
| 1231 | goto unreserve_out; | 1303 | goto unreserve_out; |
| 1232 | 1304 | ||
| @@ -1368,7 +1440,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) | |||
| 1368 | { | 1440 | { |
| 1369 | struct amdgpu_bo_list_entry *pd_bo_list; | 1441 | struct amdgpu_bo_list_entry *pd_bo_list; |
| 1370 | struct amdkfd_process_info *process_info = info; | 1442 | struct amdkfd_process_info *process_info = info; |
| 1371 | struct amdkfd_vm *peer_vm; | 1443 | struct amdgpu_vm *peer_vm; |
| 1372 | struct kgd_mem *mem; | 1444 | struct kgd_mem *mem; |
| 1373 | struct bo_vm_reservation_context ctx; | 1445 | struct bo_vm_reservation_context ctx; |
| 1374 | struct amdgpu_amdkfd_fence *new_fence; | 1446 | struct amdgpu_amdkfd_fence *new_fence; |
| @@ -1390,8 +1462,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) | |||
| 1390 | mutex_lock(&process_info->lock); | 1462 | mutex_lock(&process_info->lock); |
| 1391 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | 1463 | list_for_each_entry(peer_vm, &process_info->vm_list_head, |
| 1392 | vm_list_node) | 1464 | vm_list_node) |
| 1393 | amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, | 1465 | amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); |
| 1394 | &pd_bo_list[i++]); | ||
| 1395 | 1466 | ||
| 1396 | /* Reserve all BOs and page tables/directory. Add all BOs from | 1467 | /* Reserve all BOs and page tables/directory. Add all BOs from |
| 1397 | * kfd_bo_list to ctx.list | 1468 | * kfd_bo_list to ctx.list |
| @@ -1422,7 +1493,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) | |||
| 1422 | /* FIXME: I think this isn't needed */ | 1493 | /* FIXME: I think this isn't needed */ |
| 1423 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | 1494 | list_for_each_entry(peer_vm, &process_info->vm_list_head, |
| 1424 | vm_list_node) { | 1495 | vm_list_node) { |
| 1425 | struct amdgpu_bo *bo = peer_vm->base.root.base.bo; | 1496 | struct amdgpu_bo *bo = peer_vm->root.base.bo; |
| 1426 | 1497 | ||
| 1427 | ttm_bo_wait(&bo->tbo, false, false); | 1498 | ttm_bo_wait(&bo->tbo, false, false); |
| 1428 | } | 1499 | } |
| @@ -1491,7 +1562,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) | |||
| 1491 | /* Attach eviction fence to PD / PT BOs */ | 1562 | /* Attach eviction fence to PD / PT BOs */ |
| 1492 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | 1563 | list_for_each_entry(peer_vm, &process_info->vm_list_head, |
| 1493 | vm_list_node) { | 1564 | vm_list_node) { |
| 1494 | struct amdgpu_bo *bo = peer_vm->base.root.base.bo; | 1565 | struct amdgpu_bo *bo = peer_vm->root.base.bo; |
| 1495 | 1566 | ||
| 1496 | amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); | 1567 | amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); |
| 1497 | } | 1568 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 24474294c92a..da55a78d7380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <drm/amdgpu_drm.h> | 32 | #include <drm/amdgpu_drm.h> |
| 33 | #include "amdgpu.h" | 33 | #include "amdgpu.h" |
| 34 | #include "amdgpu_trace.h" | 34 | #include "amdgpu_trace.h" |
| 35 | #include "amdgpu_amdkfd.h" | ||
| 35 | 36 | ||
| 36 | /* | 37 | /* |
| 37 | * GPUVM | 38 | * GPUVM |
| @@ -2405,8 +2406,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 2405 | if (vm->use_cpu_for_update) | 2406 | if (vm->use_cpu_for_update) |
| 2406 | flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | 2407 | flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
| 2407 | else | 2408 | else |
| 2408 | flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 2409 | flags |= AMDGPU_GEM_CREATE_SHADOW; |
| 2409 | AMDGPU_GEM_CREATE_SHADOW); | ||
| 2410 | 2410 | ||
| 2411 | size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); | 2411 | size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); |
| 2412 | r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, | 2412 | r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, |
| @@ -2462,6 +2462,73 @@ error_free_sched_entity: | |||
| 2462 | } | 2462 | } |
| 2463 | 2463 | ||
| 2464 | /** | 2464 | /** |
| 2465 | * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM | ||
| 2466 | * | ||
| 2467 | * This only works on GFX VMs that don't have any BOs added and no | ||
| 2468 | * page tables allocated yet. | ||
| 2469 | * | ||
| 2470 | * Changes the following VM parameters: | ||
| 2471 | * - use_cpu_for_update | ||
| 2472 | * - pte_supports_ats | ||
| 2473 | * - pasid (old PASID is released, because compute manages its own PASIDs) | ||
| 2474 | * | ||
| 2475 | * Reinitializes the page directory to reflect the changed ATS | ||
| 2476 | * setting. May leave behind an unused shadow BO for the page | ||
| 2477 | * directory when switching from SDMA updates to CPU updates. | ||
| 2478 | * | ||
| 2479 | * Returns 0 for success, -errno for errors. | ||
| 2480 | */ | ||
| 2481 | int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) | ||
| 2482 | { | ||
| 2483 | bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); | ||
| 2484 | int r; | ||
| 2485 | |||
| 2486 | r = amdgpu_bo_reserve(vm->root.base.bo, true); | ||
| 2487 | if (r) | ||
| 2488 | return r; | ||
| 2489 | |||
| 2490 | /* Sanity checks */ | ||
| 2491 | if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { | ||
| 2492 | r = -EINVAL; | ||
| 2493 | goto error; | ||
| 2494 | } | ||
| 2495 | |||
| 2496 | /* Check if PD needs to be reinitialized and do it before | ||
| 2497 | * changing any other state, in case it fails. | ||
| 2498 | */ | ||
| 2499 | if (pte_support_ats != vm->pte_support_ats) { | ||
| 2500 | r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, | ||
| 2501 | adev->vm_manager.root_level, | ||
| 2502 | pte_support_ats); | ||
| 2503 | if (r) | ||
| 2504 | goto error; | ||
| 2505 | } | ||
| 2506 | |||
| 2507 | /* Update VM state */ | ||
| 2508 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | ||
| 2509 | AMDGPU_VM_USE_CPU_FOR_COMPUTE); | ||
| 2510 | vm->pte_support_ats = pte_support_ats; | ||
| 2511 | DRM_DEBUG_DRIVER("VM update mode is %s\n", | ||
| 2512 | vm->use_cpu_for_update ? "CPU" : "SDMA"); | ||
| 2513 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), | ||
| 2514 | "CPU update of VM recommended only for large BAR system\n"); | ||
| 2515 | |||
| 2516 | if (vm->pasid) { | ||
| 2517 | unsigned long flags; | ||
| 2518 | |||
| 2519 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); | ||
| 2520 | idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); | ||
| 2521 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); | ||
| 2522 | |||
| 2523 | vm->pasid = 0; | ||
| 2524 | } | ||
| 2525 | |||
| 2526 | error: | ||
| 2527 | amdgpu_bo_unreserve(vm->root.base.bo); | ||
| 2528 | return r; | ||
| 2529 | } | ||
| 2530 | |||
| 2531 | /** | ||
| 2465 | * amdgpu_vm_free_levels - free PD/PT levels | 2532 | * amdgpu_vm_free_levels - free PD/PT levels |
| 2466 | * | 2533 | * |
| 2467 | * @adev: amdgpu device structure | 2534 | * @adev: amdgpu device structure |
| @@ -2508,6 +2575,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
| 2508 | u64 fault; | 2575 | u64 fault; |
| 2509 | int i, r; | 2576 | int i, r; |
| 2510 | 2577 | ||
| 2578 | amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); | ||
| 2579 | |||
| 2511 | /* Clear pending page faults from IH when the VM is destroyed */ | 2580 | /* Clear pending page faults from IH when the VM is destroyed */ |
| 2512 | while (kfifo_get(&vm->faults, &fault)) | 2581 | while (kfifo_get(&vm->faults, &fault)) |
| 2513 | amdgpu_ih_clear_fault(adev, fault); | 2582 | amdgpu_ih_clear_fault(adev, fault); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index e9841518343e..30f080364c97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |||
| @@ -207,6 +207,15 @@ struct amdgpu_vm { | |||
| 207 | 207 | ||
| 208 | /* Limit non-retry fault storms */ | 208 | /* Limit non-retry fault storms */ |
| 209 | unsigned int fault_credit; | 209 | unsigned int fault_credit; |
| 210 | |||
| 211 | /* Points to the KFD process VM info */ | ||
| 212 | struct amdkfd_process_info *process_info; | ||
| 213 | |||
| 214 | /* List node in amdkfd_process_info.vm_list_head */ | ||
| 215 | struct list_head vm_list_node; | ||
| 216 | |||
| 217 | /* Valid while the PD is reserved or fenced */ | ||
| 218 | uint64_t pd_phys_addr; | ||
| 210 | }; | 219 | }; |
| 211 | 220 | ||
| 212 | struct amdgpu_vm_manager { | 221 | struct amdgpu_vm_manager { |
| @@ -251,6 +260,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev); | |||
| 251 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); | 260 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev); |
| 252 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 261 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
| 253 | int vm_context, unsigned int pasid); | 262 | int vm_context, unsigned int pasid); |
| 263 | int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); | ||
| 254 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); | 264 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); |
| 255 | bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, | 265 | bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, |
| 256 | unsigned int pasid); | 266 | unsigned int pasid); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6fe24964540b..cd679cf1fd30 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/export.h> | 24 | #include <linux/export.h> |
| 25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
| 26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
| 27 | #include <linux/file.h> | ||
| 27 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
| 28 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 29 | #include <linux/uaccess.h> | 30 | #include <linux/uaccess.h> |
| @@ -825,12 +826,155 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, | |||
| 825 | return 0; | 826 | return 0; |
| 826 | } | 827 | } |
| 827 | 828 | ||
| 829 | static int kfd_ioctl_get_process_apertures_new(struct file *filp, | ||
| 830 | struct kfd_process *p, void *data) | ||
| 831 | { | ||
| 832 | struct kfd_ioctl_get_process_apertures_new_args *args = data; | ||
| 833 | struct kfd_process_device_apertures *pa; | ||
| 834 | struct kfd_process_device *pdd; | ||
| 835 | uint32_t nodes = 0; | ||
| 836 | int ret; | ||
| 837 | |||
| 838 | dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); | ||
| 839 | |||
| 840 | if (args->num_of_nodes == 0) { | ||
| 841 | /* Return number of nodes, so that user space can alloacate | ||
| 842 | * sufficient memory | ||
| 843 | */ | ||
| 844 | mutex_lock(&p->mutex); | ||
| 845 | |||
| 846 | if (!kfd_has_process_device_data(p)) | ||
| 847 | goto out_unlock; | ||
| 848 | |||
| 849 | /* Run over all pdd of the process */ | ||
| 850 | pdd = kfd_get_first_process_device_data(p); | ||
| 851 | do { | ||
| 852 | args->num_of_nodes++; | ||
| 853 | pdd = kfd_get_next_process_device_data(p, pdd); | ||
| 854 | } while (pdd); | ||
| 855 | |||
| 856 | goto out_unlock; | ||
| 857 | } | ||
| 858 | |||
| 859 | /* Fill in process-aperture information for all available | ||
| 860 | * nodes, but not more than args->num_of_nodes as that is | ||
| 861 | * the amount of memory allocated by user | ||
| 862 | */ | ||
| 863 | pa = kzalloc((sizeof(struct kfd_process_device_apertures) * | ||
| 864 | args->num_of_nodes), GFP_KERNEL); | ||
| 865 | if (!pa) | ||
| 866 | return -ENOMEM; | ||
| 867 | |||
| 868 | mutex_lock(&p->mutex); | ||
| 869 | |||
| 870 | if (!kfd_has_process_device_data(p)) { | ||
| 871 | args->num_of_nodes = 0; | ||
| 872 | kfree(pa); | ||
| 873 | goto out_unlock; | ||
| 874 | } | ||
| 875 | |||
| 876 | /* Run over all pdd of the process */ | ||
| 877 | pdd = kfd_get_first_process_device_data(p); | ||
| 878 | do { | ||
| 879 | pa[nodes].gpu_id = pdd->dev->id; | ||
| 880 | pa[nodes].lds_base = pdd->lds_base; | ||
| 881 | pa[nodes].lds_limit = pdd->lds_limit; | ||
| 882 | pa[nodes].gpuvm_base = pdd->gpuvm_base; | ||
| 883 | pa[nodes].gpuvm_limit = pdd->gpuvm_limit; | ||
| 884 | pa[nodes].scratch_base = pdd->scratch_base; | ||
| 885 | pa[nodes].scratch_limit = pdd->scratch_limit; | ||
| 886 | |||
| 887 | dev_dbg(kfd_device, | ||
| 888 | "gpu id %u\n", pdd->dev->id); | ||
| 889 | dev_dbg(kfd_device, | ||
| 890 | "lds_base %llX\n", pdd->lds_base); | ||
| 891 | dev_dbg(kfd_device, | ||
| 892 | "lds_limit %llX\n", pdd->lds_limit); | ||
| 893 | dev_dbg(kfd_device, | ||
| 894 | "gpuvm_base %llX\n", pdd->gpuvm_base); | ||
| 895 | dev_dbg(kfd_device, | ||
| 896 | "gpuvm_limit %llX\n", pdd->gpuvm_limit); | ||
| 897 | dev_dbg(kfd_device, | ||
| 898 | "scratch_base %llX\n", pdd->scratch_base); | ||
| 899 | dev_dbg(kfd_device, | ||
| 900 | "scratch_limit %llX\n", pdd->scratch_limit); | ||
| 901 | nodes++; | ||
| 902 | |||
| 903 | pdd = kfd_get_next_process_device_data(p, pdd); | ||
| 904 | } while (pdd && (nodes < args->num_of_nodes)); | ||
| 905 | mutex_unlock(&p->mutex); | ||
| 906 | |||
| 907 | args->num_of_nodes = nodes; | ||
| 908 | ret = copy_to_user( | ||
| 909 | (void __user *)args->kfd_process_device_apertures_ptr, | ||
| 910 | pa, | ||
| 911 | (nodes * sizeof(struct kfd_process_device_apertures))); | ||
| 912 | kfree(pa); | ||
| 913 | return ret ? -EFAULT : 0; | ||
| 914 | |||
| 915 | out_unlock: | ||
| 916 | mutex_unlock(&p->mutex); | ||
| 917 | return 0; | ||
| 918 | } | ||
| 919 | |||
| 828 | static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, | 920 | static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, |
| 829 | void *data) | 921 | void *data) |
| 830 | { | 922 | { |
| 831 | struct kfd_ioctl_create_event_args *args = data; | 923 | struct kfd_ioctl_create_event_args *args = data; |
| 832 | int err; | 924 | int err; |
| 833 | 925 | ||
| 926 | /* For dGPUs the event page is allocated in user mode. The | ||
| 927 | * handle is passed to KFD with the first call to this IOCTL | ||
| 928 | * through the event_page_offset field. | ||
| 929 | */ | ||
| 930 | if (args->event_page_offset) { | ||
| 931 | struct kfd_dev *kfd; | ||
| 932 | struct kfd_process_device *pdd; | ||
| 933 | void *mem, *kern_addr; | ||
| 934 | uint64_t size; | ||
| 935 | |||
| 936 | if (p->signal_page) { | ||
| 937 | pr_err("Event page is already set\n"); | ||
| 938 | return -EINVAL; | ||
| 939 | } | ||
| 940 | |||
| 941 | kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); | ||
| 942 | if (!kfd) { | ||
| 943 | pr_err("Getting device by id failed in %s\n", __func__); | ||
| 944 | return -EINVAL; | ||
| 945 | } | ||
| 946 | |||
| 947 | mutex_lock(&p->mutex); | ||
| 948 | pdd = kfd_bind_process_to_device(kfd, p); | ||
| 949 | if (IS_ERR(pdd)) { | ||
| 950 | err = PTR_ERR(pdd); | ||
| 951 | goto out_unlock; | ||
| 952 | } | ||
| 953 | |||
| 954 | mem = kfd_process_device_translate_handle(pdd, | ||
| 955 | GET_IDR_HANDLE(args->event_page_offset)); | ||
| 956 | if (!mem) { | ||
| 957 | pr_err("Can't find BO, offset is 0x%llx\n", | ||
| 958 | args->event_page_offset); | ||
| 959 | err = -EINVAL; | ||
| 960 | goto out_unlock; | ||
| 961 | } | ||
| 962 | mutex_unlock(&p->mutex); | ||
| 963 | |||
| 964 | err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, | ||
| 965 | mem, &kern_addr, &size); | ||
| 966 | if (err) { | ||
| 967 | pr_err("Failed to map event page to kernel\n"); | ||
| 968 | return err; | ||
| 969 | } | ||
| 970 | |||
| 971 | err = kfd_event_page_set(p, kern_addr, size); | ||
| 972 | if (err) { | ||
| 973 | pr_err("Failed to set event page\n"); | ||
| 974 | return err; | ||
| 975 | } | ||
| 976 | } | ||
| 977 | |||
| 834 | err = kfd_event_create(filp, p, args->event_type, | 978 | err = kfd_event_create(filp, p, args->event_type, |
| 835 | args->auto_reset != 0, args->node_id, | 979 | args->auto_reset != 0, args->node_id, |
| 836 | &args->event_id, &args->event_trigger_data, | 980 | &args->event_id, &args->event_trigger_data, |
| @@ -838,6 +982,10 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, | |||
| 838 | &args->event_slot_index); | 982 | &args->event_slot_index); |
| 839 | 983 | ||
| 840 | return err; | 984 | return err; |
| 985 | |||
| 986 | out_unlock: | ||
| 987 | mutex_unlock(&p->mutex); | ||
| 988 | return err; | ||
| 841 | } | 989 | } |
| 842 | 990 | ||
| 843 | static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, | 991 | static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, |
| @@ -955,6 +1103,371 @@ static int kfd_ioctl_get_tile_config(struct file *filep, | |||
| 955 | return 0; | 1103 | return 0; |
| 956 | } | 1104 | } |
| 957 | 1105 | ||
| 1106 | static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, | ||
| 1107 | void *data) | ||
| 1108 | { | ||
| 1109 | struct kfd_ioctl_acquire_vm_args *args = data; | ||
| 1110 | struct kfd_process_device *pdd; | ||
| 1111 | struct kfd_dev *dev; | ||
| 1112 | struct file *drm_file; | ||
| 1113 | int ret; | ||
| 1114 | |||
| 1115 | dev = kfd_device_by_id(args->gpu_id); | ||
| 1116 | if (!dev) | ||
| 1117 | return -EINVAL; | ||
| 1118 | |||
| 1119 | drm_file = fget(args->drm_fd); | ||
| 1120 | if (!drm_file) | ||
| 1121 | return -EINVAL; | ||
| 1122 | |||
| 1123 | mutex_lock(&p->mutex); | ||
| 1124 | |||
| 1125 | pdd = kfd_get_process_device_data(dev, p); | ||
| 1126 | if (!pdd) { | ||
| 1127 | ret = -EINVAL; | ||
| 1128 | goto err_unlock; | ||
| 1129 | } | ||
| 1130 | |||
| 1131 | if (pdd->drm_file) { | ||
| 1132 | ret = pdd->drm_file == drm_file ? 0 : -EBUSY; | ||
| 1133 | goto err_unlock; | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | ret = kfd_process_device_init_vm(pdd, drm_file); | ||
| 1137 | if (ret) | ||
| 1138 | goto err_unlock; | ||
| 1139 | /* On success, the PDD keeps the drm_file reference */ | ||
| 1140 | mutex_unlock(&p->mutex); | ||
| 1141 | |||
| 1142 | return 0; | ||
| 1143 | |||
| 1144 | err_unlock: | ||
| 1145 | mutex_unlock(&p->mutex); | ||
| 1146 | fput(drm_file); | ||
| 1147 | return ret; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | bool kfd_dev_is_large_bar(struct kfd_dev *dev) | ||
| 1151 | { | ||
| 1152 | struct kfd_local_mem_info mem_info; | ||
| 1153 | |||
| 1154 | if (debug_largebar) { | ||
| 1155 | pr_debug("Simulate large-bar allocation on non large-bar machine\n"); | ||
| 1156 | return true; | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | if (dev->device_info->needs_iommu_device) | ||
| 1160 | return false; | ||
| 1161 | |||
| 1162 | dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); | ||
| 1163 | if (mem_info.local_mem_size_private == 0 && | ||
| 1164 | mem_info.local_mem_size_public > 0) | ||
| 1165 | return true; | ||
| 1166 | return false; | ||
| 1167 | } | ||
| 1168 | |||
| 1169 | static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, | ||
| 1170 | struct kfd_process *p, void *data) | ||
| 1171 | { | ||
| 1172 | struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; | ||
| 1173 | struct kfd_process_device *pdd; | ||
| 1174 | void *mem; | ||
| 1175 | struct kfd_dev *dev; | ||
| 1176 | int idr_handle; | ||
| 1177 | long err; | ||
| 1178 | uint64_t offset = args->mmap_offset; | ||
| 1179 | uint32_t flags = args->flags; | ||
| 1180 | |||
| 1181 | if (args->size == 0) | ||
| 1182 | return -EINVAL; | ||
| 1183 | |||
| 1184 | dev = kfd_device_by_id(args->gpu_id); | ||
| 1185 | if (!dev) | ||
| 1186 | return -EINVAL; | ||
| 1187 | |||
| 1188 | if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && | ||
| 1189 | (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && | ||
| 1190 | !kfd_dev_is_large_bar(dev)) { | ||
| 1191 | pr_err("Alloc host visible vram on small bar is not allowed\n"); | ||
| 1192 | return -EINVAL; | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | mutex_lock(&p->mutex); | ||
| 1196 | |||
| 1197 | pdd = kfd_bind_process_to_device(dev, p); | ||
| 1198 | if (IS_ERR(pdd)) { | ||
| 1199 | err = PTR_ERR(pdd); | ||
| 1200 | goto err_unlock; | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | err = dev->kfd2kgd->alloc_memory_of_gpu( | ||
| 1204 | dev->kgd, args->va_addr, args->size, | ||
| 1205 | pdd->vm, (struct kgd_mem **) &mem, &offset, | ||
| 1206 | flags); | ||
| 1207 | |||
| 1208 | if (err) | ||
| 1209 | goto err_unlock; | ||
| 1210 | |||
| 1211 | idr_handle = kfd_process_device_create_obj_handle(pdd, mem); | ||
| 1212 | if (idr_handle < 0) { | ||
| 1213 | err = -EFAULT; | ||
| 1214 | goto err_free; | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | mutex_unlock(&p->mutex); | ||
| 1218 | |||
| 1219 | args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); | ||
| 1220 | args->mmap_offset = offset; | ||
| 1221 | |||
| 1222 | return 0; | ||
| 1223 | |||
| 1224 | err_free: | ||
| 1225 | dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); | ||
| 1226 | err_unlock: | ||
| 1227 | mutex_unlock(&p->mutex); | ||
| 1228 | return err; | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | static int kfd_ioctl_free_memory_of_gpu(struct file *filep, | ||
| 1232 | struct kfd_process *p, void *data) | ||
| 1233 | { | ||
| 1234 | struct kfd_ioctl_free_memory_of_gpu_args *args = data; | ||
| 1235 | struct kfd_process_device *pdd; | ||
| 1236 | void *mem; | ||
| 1237 | struct kfd_dev *dev; | ||
| 1238 | int ret; | ||
| 1239 | |||
| 1240 | dev = kfd_device_by_id(GET_GPU_ID(args->handle)); | ||
| 1241 | if (!dev) | ||
| 1242 | return -EINVAL; | ||
| 1243 | |||
| 1244 | mutex_lock(&p->mutex); | ||
| 1245 | |||
| 1246 | pdd = kfd_get_process_device_data(dev, p); | ||
| 1247 | if (!pdd) { | ||
| 1248 | pr_err("Process device data doesn't exist\n"); | ||
| 1249 | ret = -EINVAL; | ||
| 1250 | goto err_unlock; | ||
| 1251 | } | ||
| 1252 | |||
| 1253 | mem = kfd_process_device_translate_handle( | ||
| 1254 | pdd, GET_IDR_HANDLE(args->handle)); | ||
| 1255 | if (!mem) { | ||
| 1256 | ret = -EINVAL; | ||
| 1257 | goto err_unlock; | ||
| 1258 | } | ||
| 1259 | |||
| 1260 | ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); | ||
| 1261 | |||
| 1262 | /* If freeing the buffer failed, leave the handle in place for | ||
| 1263 | * clean-up during process tear-down. | ||
| 1264 | */ | ||
| 1265 | if (!ret) | ||
| 1266 | kfd_process_device_remove_obj_handle( | ||
| 1267 | pdd, GET_IDR_HANDLE(args->handle)); | ||
| 1268 | |||
| 1269 | err_unlock: | ||
| 1270 | mutex_unlock(&p->mutex); | ||
| 1271 | return ret; | ||
| 1272 | } | ||
| 1273 | |||
| 1274 | static int kfd_ioctl_map_memory_to_gpu(struct file *filep, | ||
| 1275 | struct kfd_process *p, void *data) | ||
| 1276 | { | ||
| 1277 | struct kfd_ioctl_map_memory_to_gpu_args *args = data; | ||
| 1278 | struct kfd_process_device *pdd, *peer_pdd; | ||
| 1279 | void *mem; | ||
| 1280 | struct kfd_dev *dev, *peer; | ||
| 1281 | long err = 0; | ||
| 1282 | int i; | ||
| 1283 | uint32_t *devices_arr = NULL; | ||
| 1284 | |||
| 1285 | dev = kfd_device_by_id(GET_GPU_ID(args->handle)); | ||
| 1286 | if (!dev) | ||
| 1287 | return -EINVAL; | ||
| 1288 | |||
| 1289 | if (!args->n_devices) { | ||
| 1290 | pr_debug("Device IDs array empty\n"); | ||
| 1291 | return -EINVAL; | ||
| 1292 | } | ||
| 1293 | if (args->n_success > args->n_devices) { | ||
| 1294 | pr_debug("n_success exceeds n_devices\n"); | ||
| 1295 | return -EINVAL; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), | ||
| 1299 | GFP_KERNEL); | ||
| 1300 | if (!devices_arr) | ||
| 1301 | return -ENOMEM; | ||
| 1302 | |||
| 1303 | err = copy_from_user(devices_arr, | ||
| 1304 | (void __user *)args->device_ids_array_ptr, | ||
| 1305 | args->n_devices * sizeof(*devices_arr)); | ||
| 1306 | if (err != 0) { | ||
| 1307 | err = -EFAULT; | ||
| 1308 | goto copy_from_user_failed; | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | mutex_lock(&p->mutex); | ||
| 1312 | |||
| 1313 | pdd = kfd_bind_process_to_device(dev, p); | ||
| 1314 | if (IS_ERR(pdd)) { | ||
| 1315 | err = PTR_ERR(pdd); | ||
| 1316 | goto bind_process_to_device_failed; | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | mem = kfd_process_device_translate_handle(pdd, | ||
| 1320 | GET_IDR_HANDLE(args->handle)); | ||
| 1321 | if (!mem) { | ||
| 1322 | err = -ENOMEM; | ||
| 1323 | goto get_mem_obj_from_handle_failed; | ||
| 1324 | } | ||
| 1325 | |||
| 1326 | for (i = args->n_success; i < args->n_devices; i++) { | ||
| 1327 | peer = kfd_device_by_id(devices_arr[i]); | ||
| 1328 | if (!peer) { | ||
| 1329 | pr_debug("Getting device by id failed for 0x%x\n", | ||
| 1330 | devices_arr[i]); | ||
| 1331 | err = -EINVAL; | ||
| 1332 | goto get_mem_obj_from_handle_failed; | ||
| 1333 | } | ||
| 1334 | |||
| 1335 | peer_pdd = kfd_bind_process_to_device(peer, p); | ||
| 1336 | if (IS_ERR(peer_pdd)) { | ||
| 1337 | err = PTR_ERR(peer_pdd); | ||
| 1338 | goto get_mem_obj_from_handle_failed; | ||
| 1339 | } | ||
| 1340 | err = peer->kfd2kgd->map_memory_to_gpu( | ||
| 1341 | peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); | ||
| 1342 | if (err) { | ||
| 1343 | pr_err("Failed to map to gpu %d/%d\n", | ||
| 1344 | i, args->n_devices); | ||
| 1345 | goto map_memory_to_gpu_failed; | ||
| 1346 | } | ||
| 1347 | args->n_success = i+1; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | mutex_unlock(&p->mutex); | ||
| 1351 | |||
| 1352 | err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); | ||
| 1353 | if (err) { | ||
| 1354 | pr_debug("Sync memory failed, wait interrupted by user signal\n"); | ||
| 1355 | goto sync_memory_failed; | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | /* Flush TLBs after waiting for the page table updates to complete */ | ||
| 1359 | for (i = 0; i < args->n_devices; i++) { | ||
| 1360 | peer = kfd_device_by_id(devices_arr[i]); | ||
| 1361 | if (WARN_ON_ONCE(!peer)) | ||
| 1362 | continue; | ||
| 1363 | peer_pdd = kfd_get_process_device_data(peer, p); | ||
| 1364 | if (WARN_ON_ONCE(!peer_pdd)) | ||
| 1365 | continue; | ||
| 1366 | kfd_flush_tlb(peer_pdd); | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | kfree(devices_arr); | ||
| 1370 | |||
| 1371 | return err; | ||
| 1372 | |||
| 1373 | bind_process_to_device_failed: | ||
| 1374 | get_mem_obj_from_handle_failed: | ||
| 1375 | map_memory_to_gpu_failed: | ||
| 1376 | mutex_unlock(&p->mutex); | ||
| 1377 | copy_from_user_failed: | ||
| 1378 | sync_memory_failed: | ||
| 1379 | kfree(devices_arr); | ||
| 1380 | |||
| 1381 | return err; | ||
| 1382 | } | ||
| 1383 | |||
| 1384 | static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, | ||
| 1385 | struct kfd_process *p, void *data) | ||
| 1386 | { | ||
| 1387 | struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; | ||
| 1388 | struct kfd_process_device *pdd, *peer_pdd; | ||
| 1389 | void *mem; | ||
| 1390 | struct kfd_dev *dev, *peer; | ||
| 1391 | long err = 0; | ||
| 1392 | uint32_t *devices_arr = NULL, i; | ||
| 1393 | |||
| 1394 | dev = kfd_device_by_id(GET_GPU_ID(args->handle)); | ||
| 1395 | if (!dev) | ||
| 1396 | return -EINVAL; | ||
| 1397 | |||
| 1398 | if (!args->n_devices) { | ||
| 1399 | pr_debug("Device IDs array empty\n"); | ||
| 1400 | return -EINVAL; | ||
| 1401 | } | ||
| 1402 | if (args->n_success > args->n_devices) { | ||
| 1403 | pr_debug("n_success exceeds n_devices\n"); | ||
| 1404 | return -EINVAL; | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), | ||
| 1408 | GFP_KERNEL); | ||
| 1409 | if (!devices_arr) | ||
| 1410 | return -ENOMEM; | ||
| 1411 | |||
| 1412 | err = copy_from_user(devices_arr, | ||
| 1413 | (void __user *)args->device_ids_array_ptr, | ||
| 1414 | args->n_devices * sizeof(*devices_arr)); | ||
| 1415 | if (err != 0) { | ||
| 1416 | err = -EFAULT; | ||
| 1417 | goto copy_from_user_failed; | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | mutex_lock(&p->mutex); | ||
| 1421 | |||
| 1422 | pdd = kfd_get_process_device_data(dev, p); | ||
| 1423 | if (!pdd) { | ||
| 1424 | err = PTR_ERR(pdd); | ||
| 1425 | goto bind_process_to_device_failed; | ||
| 1426 | } | ||
| 1427 | |||
| 1428 | mem = kfd_process_device_translate_handle(pdd, | ||
| 1429 | GET_IDR_HANDLE(args->handle)); | ||
| 1430 | if (!mem) { | ||
| 1431 | err = -ENOMEM; | ||
| 1432 | goto get_mem_obj_from_handle_failed; | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | for (i = args->n_success; i < args->n_devices; i++) { | ||
| 1436 | peer = kfd_device_by_id(devices_arr[i]); | ||
| 1437 | if (!peer) { | ||
| 1438 | err = -EINVAL; | ||
| 1439 | goto get_mem_obj_from_handle_failed; | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | peer_pdd = kfd_get_process_device_data(peer, p); | ||
| 1443 | if (!peer_pdd) { | ||
| 1444 | err = -ENODEV; | ||
| 1445 | goto get_mem_obj_from_handle_failed; | ||
| 1446 | } | ||
| 1447 | err = dev->kfd2kgd->unmap_memory_to_gpu( | ||
| 1448 | peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); | ||
| 1449 | if (err) { | ||
| 1450 | pr_err("Failed to unmap from gpu %d/%d\n", | ||
| 1451 | i, args->n_devices); | ||
| 1452 | goto unmap_memory_from_gpu_failed; | ||
| 1453 | } | ||
| 1454 | args->n_success = i+1; | ||
| 1455 | } | ||
| 1456 | kfree(devices_arr); | ||
| 1457 | |||
| 1458 | mutex_unlock(&p->mutex); | ||
| 1459 | |||
| 1460 | return 0; | ||
| 1461 | |||
| 1462 | bind_process_to_device_failed: | ||
| 1463 | get_mem_obj_from_handle_failed: | ||
| 1464 | unmap_memory_from_gpu_failed: | ||
| 1465 | mutex_unlock(&p->mutex); | ||
| 1466 | copy_from_user_failed: | ||
| 1467 | kfree(devices_arr); | ||
| 1468 | return err; | ||
| 1469 | } | ||
| 1470 | |||
| 958 | #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ | 1471 | #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ |
| 959 | [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ | 1472 | [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ |
| 960 | .cmd_drv = 0, .name = #ioctl} | 1473 | .cmd_drv = 0, .name = #ioctl} |
| @@ -1017,6 +1530,25 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { | |||
| 1017 | 1530 | ||
| 1018 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, | 1531 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, |
| 1019 | kfd_ioctl_set_trap_handler, 0), | 1532 | kfd_ioctl_set_trap_handler, 0), |
| 1533 | |||
| 1534 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, | ||
| 1535 | kfd_ioctl_get_process_apertures_new, 0), | ||
| 1536 | |||
| 1537 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, | ||
| 1538 | kfd_ioctl_acquire_vm, 0), | ||
| 1539 | |||
| 1540 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, | ||
| 1541 | kfd_ioctl_alloc_memory_of_gpu, 0), | ||
| 1542 | |||
| 1543 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, | ||
| 1544 | kfd_ioctl_free_memory_of_gpu, 0), | ||
| 1545 | |||
| 1546 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, | ||
| 1547 | kfd_ioctl_map_memory_to_gpu, 0), | ||
| 1548 | |||
| 1549 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, | ||
| 1550 | kfd_ioctl_unmap_memory_from_gpu, 0), | ||
| 1551 | |||
| 1020 | }; | 1552 | }; |
| 1021 | 1553 | ||
| 1022 | #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) | 1554 | #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 7493f47e7fe1..4f126ef6139b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c | |||
| @@ -882,7 +882,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) | |||
| 882 | crat_table->length = sizeof(struct crat_header); | 882 | crat_table->length = sizeof(struct crat_header); |
| 883 | 883 | ||
| 884 | status = acpi_get_table("DSDT", 0, &acpi_table); | 884 | status = acpi_get_table("DSDT", 0, &acpi_table); |
| 885 | if (status == AE_NOT_FOUND) | 885 | if (status != AE_OK) |
| 886 | pr_warn("DSDT table not found for OEM information\n"); | 886 | pr_warn("DSDT table not found for OEM information\n"); |
| 887 | else { | 887 | else { |
| 888 | crat_table->oem_revision = acpi_table->revision; | 888 | crat_table->oem_revision = acpi_table->revision; |
| @@ -1117,6 +1117,9 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, | |||
| 1117 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | 1117 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
| 1118 | sub_type_hdr->length); | 1118 | sub_type_hdr->length); |
| 1119 | 1119 | ||
| 1120 | if (debug_largebar) | ||
| 1121 | local_mem_info.local_mem_size_private = 0; | ||
| 1122 | |||
| 1120 | if (local_mem_info.local_mem_size_private == 0) | 1123 | if (local_mem_info.local_mem_size_private == 0) |
| 1121 | ret = kfd_fill_gpu_memory_affinity(&avail_size, | 1124 | ret = kfd_fill_gpu_memory_affinity(&avail_size, |
| 1122 | kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, | 1125 | kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b3b6dab71638..c18e048f23c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | |||
| @@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm, | |||
| 142 | return 0; | 142 | return 0; |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, | ||
| 146 | struct qcm_process_device *qpd) | ||
| 147 | { | ||
| 148 | uint32_t len; | ||
| 149 | |||
| 150 | if (!qpd->ib_kaddr) | ||
| 151 | return -ENOMEM; | ||
| 152 | |||
| 153 | len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); | ||
| 154 | |||
| 155 | return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, | ||
| 156 | qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); | ||
| 157 | } | ||
| 158 | |||
| 145 | static void deallocate_vmid(struct device_queue_manager *dqm, | 159 | static void deallocate_vmid(struct device_queue_manager *dqm, |
| 146 | struct qcm_process_device *qpd, | 160 | struct qcm_process_device *qpd, |
| 147 | struct queue *q) | 161 | struct queue *q) |
| 148 | { | 162 | { |
| 149 | int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; | 163 | int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; |
| 150 | 164 | ||
| 165 | /* On GFX v7, CP doesn't flush TC at dequeue */ | ||
| 166 | if (q->device->device_info->asic_family == CHIP_HAWAII) | ||
| 167 | if (flush_texture_cache_nocpsch(q->device, qpd)) | ||
| 168 | pr_err("Failed to flush TC\n"); | ||
| 169 | |||
| 151 | kfd_flush_tlb(qpd_to_pdd(qpd)); | 170 | kfd_flush_tlb(qpd_to_pdd(qpd)); |
| 152 | 171 | ||
| 153 | /* Release the vmid mapping */ | 172 | /* Release the vmid mapping */ |
| @@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm) | |||
| 792 | static int start_nocpsch(struct device_queue_manager *dqm) | 811 | static int start_nocpsch(struct device_queue_manager *dqm) |
| 793 | { | 812 | { |
| 794 | init_interrupts(dqm); | 813 | init_interrupts(dqm); |
| 795 | return 0; | 814 | return pm_init(&dqm->packets, dqm); |
| 796 | } | 815 | } |
| 797 | 816 | ||
| 798 | static int stop_nocpsch(struct device_queue_manager *dqm) | 817 | static int stop_nocpsch(struct device_queue_manager *dqm) |
| 799 | { | 818 | { |
| 819 | pm_uninit(&dqm->packets); | ||
| 800 | return 0; | 820 | return 0; |
| 801 | } | 821 | } |
| 802 | 822 | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 6fb9c0d46d63..4890a90f1e44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c | |||
| @@ -52,6 +52,7 @@ struct kfd_event_waiter { | |||
| 52 | struct kfd_signal_page { | 52 | struct kfd_signal_page { |
| 53 | uint64_t *kernel_address; | 53 | uint64_t *kernel_address; |
| 54 | uint64_t __user *user_address; | 54 | uint64_t __user *user_address; |
| 55 | bool need_to_free_pages; | ||
| 55 | }; | 56 | }; |
| 56 | 57 | ||
| 57 | 58 | ||
| @@ -79,6 +80,7 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) | |||
| 79 | KFD_SIGNAL_EVENT_LIMIT * 8); | 80 | KFD_SIGNAL_EVENT_LIMIT * 8); |
| 80 | 81 | ||
| 81 | page->kernel_address = backing_store; | 82 | page->kernel_address = backing_store; |
| 83 | page->need_to_free_pages = true; | ||
| 82 | pr_debug("Allocated new event signal page at %p, for process %p\n", | 84 | pr_debug("Allocated new event signal page at %p, for process %p\n", |
| 83 | page, p); | 85 | page, p); |
| 84 | 86 | ||
| @@ -269,8 +271,9 @@ static void shutdown_signal_page(struct kfd_process *p) | |||
| 269 | struct kfd_signal_page *page = p->signal_page; | 271 | struct kfd_signal_page *page = p->signal_page; |
| 270 | 272 | ||
| 271 | if (page) { | 273 | if (page) { |
| 272 | free_pages((unsigned long)page->kernel_address, | 274 | if (page->need_to_free_pages) |
| 273 | get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); | 275 | free_pages((unsigned long)page->kernel_address, |
| 276 | get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); | ||
| 274 | kfree(page); | 277 | kfree(page); |
| 275 | } | 278 | } |
| 276 | } | 279 | } |
| @@ -292,6 +295,30 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev) | |||
| 292 | return ev->type == KFD_EVENT_TYPE_SIGNAL; | 295 | return ev->type == KFD_EVENT_TYPE_SIGNAL; |
| 293 | } | 296 | } |
| 294 | 297 | ||
| 298 | int kfd_event_page_set(struct kfd_process *p, void *kernel_address, | ||
| 299 | uint64_t size) | ||
| 300 | { | ||
| 301 | struct kfd_signal_page *page; | ||
| 302 | |||
| 303 | if (p->signal_page) | ||
| 304 | return -EBUSY; | ||
| 305 | |||
| 306 | page = kzalloc(sizeof(*page), GFP_KERNEL); | ||
| 307 | if (!page) | ||
| 308 | return -ENOMEM; | ||
| 309 | |||
| 310 | /* Initialize all events to unsignaled */ | ||
| 311 | memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT, | ||
| 312 | KFD_SIGNAL_EVENT_LIMIT * 8); | ||
| 313 | |||
| 314 | page->kernel_address = kernel_address; | ||
| 315 | |||
| 316 | p->signal_page = page; | ||
| 317 | p->signal_mapped_size = size; | ||
| 318 | |||
| 319 | return 0; | ||
| 320 | } | ||
| 321 | |||
| 295 | int kfd_event_create(struct file *devkfd, struct kfd_process *p, | 322 | int kfd_event_create(struct file *devkfd, struct kfd_process *p, |
| 296 | uint32_t event_type, bool auto_reset, uint32_t node_id, | 323 | uint32_t event_type, bool auto_reset, uint32_t node_id, |
| 297 | uint32_t *event_id, uint32_t *event_trigger_data, | 324 | uint32_t *event_id, uint32_t *event_trigger_data, |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 7377513050e6..66852de410c8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | |||
| @@ -278,21 +278,28 @@ | |||
| 278 | #define MAKE_GPUVM_APP_BASE(gpu_num) \ | 278 | #define MAKE_GPUVM_APP_BASE(gpu_num) \ |
| 279 | (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) | 279 | (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) |
| 280 | 280 | ||
| 281 | #define MAKE_GPUVM_APP_LIMIT(base) \ | 281 | #define MAKE_GPUVM_APP_LIMIT(base, size) \ |
| 282 | (((uint64_t)(base) & \ | 282 | (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1) |
| 283 | 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL) | ||
| 284 | 283 | ||
| 285 | #define MAKE_SCRATCH_APP_BASE(gpu_num) \ | 284 | #define MAKE_SCRATCH_APP_BASE() \ |
| 286 | (((uint64_t)(gpu_num) << 61) + 0x100000000L) | 285 | (((uint64_t)(0x1UL) << 61) + 0x100000000L) |
| 287 | 286 | ||
| 288 | #define MAKE_SCRATCH_APP_LIMIT(base) \ | 287 | #define MAKE_SCRATCH_APP_LIMIT(base) \ |
| 289 | (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) | 288 | (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) |
| 290 | 289 | ||
| 291 | #define MAKE_LDS_APP_BASE(gpu_num) \ | 290 | #define MAKE_LDS_APP_BASE() \ |
| 292 | (((uint64_t)(gpu_num) << 61) + 0x0) | 291 | (((uint64_t)(0x1UL) << 61) + 0x0) |
| 293 | #define MAKE_LDS_APP_LIMIT(base) \ | 292 | #define MAKE_LDS_APP_LIMIT(base) \ |
| 294 | (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) | 293 | (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) |
| 295 | 294 | ||
| 295 | /* User mode manages most of the SVM aperture address space. The low | ||
| 296 | * 16MB are reserved for kernel use (CWSR trap handler and kernel IB | ||
| 297 | * for now). | ||
| 298 | */ | ||
| 299 | #define SVM_USER_BASE 0x1000000ull | ||
| 300 | #define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE) | ||
| 301 | #define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE) | ||
| 302 | |||
| 296 | int kfd_init_apertures(struct kfd_process *process) | 303 | int kfd_init_apertures(struct kfd_process *process) |
| 297 | { | 304 | { |
| 298 | uint8_t id = 0; | 305 | uint8_t id = 0; |
| @@ -314,7 +321,7 @@ int kfd_init_apertures(struct kfd_process *process) | |||
| 314 | return -1; | 321 | return -1; |
| 315 | } | 322 | } |
| 316 | /* | 323 | /* |
| 317 | * For 64 bit process aperture will be statically reserved in | 324 | * For 64 bit process apertures will be statically reserved in |
| 318 | * the x86_64 non canonical process address space | 325 | * the x86_64 non canonical process address space |
| 319 | * amdkfd doesn't currently support apertures for 32 bit process | 326 | * amdkfd doesn't currently support apertures for 32 bit process |
| 320 | */ | 327 | */ |
| @@ -323,23 +330,35 @@ int kfd_init_apertures(struct kfd_process *process) | |||
| 323 | pdd->gpuvm_base = pdd->gpuvm_limit = 0; | 330 | pdd->gpuvm_base = pdd->gpuvm_limit = 0; |
| 324 | pdd->scratch_base = pdd->scratch_limit = 0; | 331 | pdd->scratch_base = pdd->scratch_limit = 0; |
| 325 | } else { | 332 | } else { |
| 326 | /* | 333 | /* Same LDS and scratch apertures can be used |
| 327 | * node id couldn't be 0 - the three MSB bits of | 334 | * on all GPUs. This allows using more dGPUs |
| 328 | * aperture shoudn't be 0 | 335 | * than placement options for apertures. |
| 329 | */ | 336 | */ |
| 330 | pdd->lds_base = MAKE_LDS_APP_BASE(id + 1); | 337 | pdd->lds_base = MAKE_LDS_APP_BASE(); |
| 331 | |||
| 332 | pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); | 338 | pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); |
| 333 | 339 | ||
| 334 | pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); | 340 | pdd->scratch_base = MAKE_SCRATCH_APP_BASE(); |
| 335 | |||
| 336 | pdd->gpuvm_limit = | ||
| 337 | MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base); | ||
| 338 | |||
| 339 | pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1); | ||
| 340 | |||
| 341 | pdd->scratch_limit = | 341 | pdd->scratch_limit = |
| 342 | MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); | 342 | MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); |
| 343 | |||
| 344 | if (dev->device_info->needs_iommu_device) { | ||
| 345 | /* APUs: GPUVM aperture in | ||
| 346 | * non-canonical address space | ||
| 347 | */ | ||
| 348 | pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); | ||
| 349 | pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT( | ||
| 350 | pdd->gpuvm_base, | ||
| 351 | dev->shared_resources.gpuvm_size); | ||
| 352 | } else { | ||
| 353 | /* dGPUs: SVM aperture starting at 0 | ||
| 354 | * with small reserved space for kernel | ||
| 355 | */ | ||
| 356 | pdd->gpuvm_base = SVM_USER_BASE; | ||
| 357 | pdd->gpuvm_limit = | ||
| 358 | dev->shared_resources.gpuvm_size - 1; | ||
| 359 | pdd->qpd.cwsr_base = SVM_CWSR_BASE; | ||
| 360 | pdd->qpd.ib_base = SVM_IB_BASE; | ||
| 361 | } | ||
| 343 | } | 362 | } |
| 344 | 363 | ||
| 345 | dev_dbg(kfd_device, "node id %u\n", id); | 364 | dev_dbg(kfd_device, "node id %u\n", id); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 65574c6a10b3..e0c07d24d251 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c | |||
| @@ -71,6 +71,11 @@ module_param(send_sigterm, int, 0444); | |||
| 71 | MODULE_PARM_DESC(send_sigterm, | 71 | MODULE_PARM_DESC(send_sigterm, |
| 72 | "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); | 72 | "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); |
| 73 | 73 | ||
| 74 | int debug_largebar; | ||
| 75 | module_param(debug_largebar, int, 0444); | ||
| 76 | MODULE_PARM_DESC(debug_largebar, | ||
| 77 | "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)"); | ||
| 78 | |||
| 74 | int ignore_crat; | 79 | int ignore_crat; |
| 75 | module_param(ignore_crat, int, 0444); | 80 | module_param(ignore_crat, int, 0444); |
| 76 | MODULE_PARM_DESC(ignore_crat, | 81 | MODULE_PARM_DESC(ignore_crat, |
| @@ -128,7 +133,9 @@ static int __init kfd_module_init(void) | |||
| 128 | if (err < 0) | 133 | if (err < 0) |
| 129 | goto err_topology; | 134 | goto err_topology; |
| 130 | 135 | ||
| 131 | kfd_process_create_wq(); | 136 | err = kfd_process_create_wq(); |
| 137 | if (err < 0) | ||
| 138 | goto err_create_wq; | ||
| 132 | 139 | ||
| 133 | kfd_debugfs_init(); | 140 | kfd_debugfs_init(); |
| 134 | 141 | ||
| @@ -138,6 +145,8 @@ static int __init kfd_module_init(void) | |||
| 138 | 145 | ||
| 139 | return 0; | 146 | return 0; |
| 140 | 147 | ||
| 148 | err_create_wq: | ||
| 149 | kfd_topology_shutdown(); | ||
| 141 | err_topology: | 150 | err_topology: |
| 142 | kfd_chardev_exit(); | 151 | kfd_chardev_exit(); |
| 143 | err_ioctl: | 152 | err_ioctl: |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 0ecbd1f9b606..7614375489a4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | |||
| @@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm, | |||
| 356 | return retval; | 356 | return retval; |
| 357 | } | 357 | } |
| 358 | 358 | ||
| 359 | /* pm_create_release_mem - Create a RELEASE_MEM packet and return the size | ||
| 360 | * of this packet | ||
| 361 | * @gpu_addr - GPU address of the packet. It's a virtual address. | ||
| 362 | * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer | ||
| 363 | * Return - length of the packet | ||
| 364 | */ | ||
| 365 | uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) | ||
| 366 | { | ||
| 367 | struct pm4_mec_release_mem *packet; | ||
| 368 | |||
| 369 | WARN_ON(!buffer); | ||
| 370 | |||
| 371 | packet = (struct pm4_mec_release_mem *)buffer; | ||
| 372 | memset(buffer, 0, sizeof(*packet)); | ||
| 373 | |||
| 374 | packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, | ||
| 375 | sizeof(*packet)); | ||
| 376 | |||
| 377 | packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; | ||
| 378 | packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; | ||
| 379 | packet->bitfields2.tcl1_action_ena = 1; | ||
| 380 | packet->bitfields2.tc_action_ena = 1; | ||
| 381 | packet->bitfields2.cache_policy = cache_policy___release_mem__lru; | ||
| 382 | packet->bitfields2.atc = 0; | ||
| 383 | |||
| 384 | packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; | ||
| 385 | packet->bitfields3.int_sel = | ||
| 386 | int_sel___release_mem__send_interrupt_after_write_confirm; | ||
| 387 | |||
| 388 | packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; | ||
| 389 | packet->address_hi = upper_32_bits(gpu_addr); | ||
| 390 | |||
| 391 | packet->data_lo = 0; | ||
| 392 | |||
| 393 | return sizeof(*packet) / sizeof(unsigned int); | ||
| 394 | } | ||
| 395 | |||
| 359 | int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) | 396 | int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) |
| 360 | { | 397 | { |
| 361 | pm->dqm = dqm; | 398 | pm->dqm = dqm; |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index cac7aa258162..96a9cc0f02c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h | |||
| @@ -105,6 +105,12 @@ extern int cwsr_enable; | |||
| 105 | extern int send_sigterm; | 105 | extern int send_sigterm; |
| 106 | 106 | ||
| 107 | /* | 107 | /* |
| 108 | * This kernel module is used to simulate large bar machine on non-large bar | ||
| 109 | * enabled machines. | ||
| 110 | */ | ||
| 111 | extern int debug_largebar; | ||
| 112 | |||
| 113 | /* | ||
| 108 | * Ignore CRAT table during KFD initialization, can be used to work around | 114 | * Ignore CRAT table during KFD initialization, can be used to work around |
| 109 | * broken CRAT tables on some AMD systems | 115 | * broken CRAT tables on some AMD systems |
| 110 | */ | 116 | */ |
| @@ -488,8 +494,13 @@ struct qcm_process_device { | |||
| 488 | 494 | ||
| 489 | /* CWSR memory */ | 495 | /* CWSR memory */ |
| 490 | void *cwsr_kaddr; | 496 | void *cwsr_kaddr; |
| 497 | uint64_t cwsr_base; | ||
| 491 | uint64_t tba_addr; | 498 | uint64_t tba_addr; |
| 492 | uint64_t tma_addr; | 499 | uint64_t tma_addr; |
| 500 | |||
| 501 | /* IB memory */ | ||
| 502 | uint64_t ib_base; | ||
| 503 | void *ib_kaddr; | ||
| 493 | }; | 504 | }; |
| 494 | 505 | ||
| 495 | /* KFD Memory Eviction */ | 506 | /* KFD Memory Eviction */ |
| @@ -504,6 +515,14 @@ struct qcm_process_device { | |||
| 504 | int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, | 515 | int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, |
| 505 | struct dma_fence *fence); | 516 | struct dma_fence *fence); |
| 506 | 517 | ||
| 518 | /* 8 byte handle containing GPU ID in the most significant 4 bytes and | ||
| 519 | * idr_handle in the least significant 4 bytes | ||
| 520 | */ | ||
| 521 | #define MAKE_HANDLE(gpu_id, idr_handle) \ | ||
| 522 | (((uint64_t)(gpu_id) << 32) + idr_handle) | ||
| 523 | #define GET_GPU_ID(handle) (handle >> 32) | ||
| 524 | #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) | ||
| 525 | |||
| 507 | enum kfd_pdd_bound { | 526 | enum kfd_pdd_bound { |
| 508 | PDD_UNBOUND = 0, | 527 | PDD_UNBOUND = 0, |
| 509 | PDD_BOUND, | 528 | PDD_BOUND, |
| @@ -536,8 +555,12 @@ struct kfd_process_device { | |||
| 536 | uint64_t scratch_limit; | 555 | uint64_t scratch_limit; |
| 537 | 556 | ||
| 538 | /* VM context for GPUVM allocations */ | 557 | /* VM context for GPUVM allocations */ |
| 558 | struct file *drm_file; | ||
| 539 | void *vm; | 559 | void *vm; |
| 540 | 560 | ||
| 561 | /* GPUVM allocations storage */ | ||
| 562 | struct idr alloc_idr; | ||
| 563 | |||
| 541 | /* Flag used to tell the pdd has dequeued from the dqm. | 564 | /* Flag used to tell the pdd has dequeued from the dqm. |
| 542 | * This is used to prevent dev->dqm->ops.process_termination() from | 565 | * This is used to prevent dev->dqm->ops.process_termination() from |
| 543 | * being called twice when it is already called in IOMMU callback | 566 | * being called twice when it is already called in IOMMU callback |
| @@ -651,7 +674,7 @@ struct amdkfd_ioctl_desc { | |||
| 651 | const char *name; | 674 | const char *name; |
| 652 | }; | 675 | }; |
| 653 | 676 | ||
| 654 | void kfd_process_create_wq(void); | 677 | int kfd_process_create_wq(void); |
| 655 | void kfd_process_destroy_wq(void); | 678 | void kfd_process_destroy_wq(void); |
| 656 | struct kfd_process *kfd_create_process(struct file *filep); | 679 | struct kfd_process *kfd_create_process(struct file *filep); |
| 657 | struct kfd_process *kfd_get_process(const struct task_struct *); | 680 | struct kfd_process *kfd_get_process(const struct task_struct *); |
| @@ -661,6 +684,8 @@ void kfd_unref_process(struct kfd_process *p); | |||
| 661 | void kfd_suspend_all_processes(void); | 684 | void kfd_suspend_all_processes(void); |
| 662 | int kfd_resume_all_processes(void); | 685 | int kfd_resume_all_processes(void); |
| 663 | 686 | ||
| 687 | int kfd_process_device_init_vm(struct kfd_process_device *pdd, | ||
| 688 | struct file *drm_file); | ||
| 664 | struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, | 689 | struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, |
| 665 | struct kfd_process *p); | 690 | struct kfd_process *p); |
| 666 | struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, | 691 | struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, |
| @@ -671,6 +696,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, | |||
| 671 | int kfd_reserved_mem_mmap(struct kfd_process *process, | 696 | int kfd_reserved_mem_mmap(struct kfd_process *process, |
| 672 | struct vm_area_struct *vma); | 697 | struct vm_area_struct *vma); |
| 673 | 698 | ||
| 699 | /* KFD process API for creating and translating handles */ | ||
| 700 | int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, | ||
| 701 | void *mem); | ||
| 702 | void *kfd_process_device_translate_handle(struct kfd_process_device *p, | ||
| 703 | int handle); | ||
| 704 | void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, | ||
| 705 | int handle); | ||
| 706 | |||
| 674 | /* Process device data iterator */ | 707 | /* Process device data iterator */ |
| 675 | struct kfd_process_device *kfd_get_first_process_device_data( | 708 | struct kfd_process_device *kfd_get_first_process_device_data( |
| 676 | struct kfd_process *p); | 709 | struct kfd_process *p); |
| @@ -816,6 +849,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, | |||
| 816 | 849 | ||
| 817 | void pm_release_ib(struct packet_manager *pm); | 850 | void pm_release_ib(struct packet_manager *pm); |
| 818 | 851 | ||
| 852 | uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); | ||
| 853 | |||
| 819 | uint64_t kfd_get_number_elems(struct kfd_dev *kfd); | 854 | uint64_t kfd_get_number_elems(struct kfd_dev *kfd); |
| 820 | 855 | ||
| 821 | /* Events */ | 856 | /* Events */ |
| @@ -837,6 +872,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, | |||
| 837 | void kfd_signal_hw_exception_event(unsigned int pasid); | 872 | void kfd_signal_hw_exception_event(unsigned int pasid); |
| 838 | int kfd_set_event(struct kfd_process *p, uint32_t event_id); | 873 | int kfd_set_event(struct kfd_process *p, uint32_t event_id); |
| 839 | int kfd_reset_event(struct kfd_process *p, uint32_t event_id); | 874 | int kfd_reset_event(struct kfd_process *p, uint32_t event_id); |
| 875 | int kfd_event_page_set(struct kfd_process *p, void *kernel_address, | ||
| 876 | uint64_t size); | ||
| 840 | int kfd_event_create(struct file *devkfd, struct kfd_process *p, | 877 | int kfd_event_create(struct file *devkfd, struct kfd_process *p, |
| 841 | uint32_t event_type, bool auto_reset, uint32_t node_id, | 878 | uint32_t event_type, bool auto_reset, uint32_t node_id, |
| 842 | uint32_t *event_id, uint32_t *event_trigger_data, | 879 | uint32_t *event_id, uint32_t *event_trigger_data, |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 18b2b86ad503..1711ad0642f7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/notifier.h> | 30 | #include <linux/notifier.h> |
| 31 | #include <linux/compat.h> | 31 | #include <linux/compat.h> |
| 32 | #include <linux/mman.h> | 32 | #include <linux/mman.h> |
| 33 | #include <linux/file.h> | ||
| 33 | 34 | ||
| 34 | struct mm_struct; | 35 | struct mm_struct; |
| 35 | 36 | ||
| @@ -47,22 +48,39 @@ static DEFINE_MUTEX(kfd_processes_mutex); | |||
| 47 | 48 | ||
| 48 | DEFINE_SRCU(kfd_processes_srcu); | 49 | DEFINE_SRCU(kfd_processes_srcu); |
| 49 | 50 | ||
| 51 | /* For process termination handling */ | ||
| 50 | static struct workqueue_struct *kfd_process_wq; | 52 | static struct workqueue_struct *kfd_process_wq; |
| 51 | 53 | ||
| 54 | /* Ordered, single-threaded workqueue for restoring evicted | ||
| 55 | * processes. Restoring multiple processes concurrently under memory | ||
| 56 | * pressure can lead to processes blocking each other from validating | ||
| 57 | * their BOs and result in a live-lock situation where processes | ||
| 58 | * remain evicted indefinitely. | ||
| 59 | */ | ||
| 60 | static struct workqueue_struct *kfd_restore_wq; | ||
| 61 | |||
| 52 | static struct kfd_process *find_process(const struct task_struct *thread); | 62 | static struct kfd_process *find_process(const struct task_struct *thread); |
| 53 | static void kfd_process_ref_release(struct kref *ref); | 63 | static void kfd_process_ref_release(struct kref *ref); |
| 54 | static struct kfd_process *create_process(const struct task_struct *thread, | 64 | static struct kfd_process *create_process(const struct task_struct *thread, |
| 55 | struct file *filep); | 65 | struct file *filep); |
| 56 | static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); | ||
| 57 | 66 | ||
| 58 | static void evict_process_worker(struct work_struct *work); | 67 | static void evict_process_worker(struct work_struct *work); |
| 59 | static void restore_process_worker(struct work_struct *work); | 68 | static void restore_process_worker(struct work_struct *work); |
| 60 | 69 | ||
| 61 | 70 | ||
| 62 | void kfd_process_create_wq(void) | 71 | int kfd_process_create_wq(void) |
| 63 | { | 72 | { |
| 64 | if (!kfd_process_wq) | 73 | if (!kfd_process_wq) |
| 65 | kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); | 74 | kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); |
| 75 | if (!kfd_restore_wq) | ||
| 76 | kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); | ||
| 77 | |||
| 78 | if (!kfd_process_wq || !kfd_restore_wq) { | ||
| 79 | kfd_process_destroy_wq(); | ||
| 80 | return -ENOMEM; | ||
| 81 | } | ||
| 82 | |||
| 83 | return 0; | ||
| 66 | } | 84 | } |
| 67 | 85 | ||
| 68 | void kfd_process_destroy_wq(void) | 86 | void kfd_process_destroy_wq(void) |
| @@ -71,6 +89,116 @@ void kfd_process_destroy_wq(void) | |||
| 71 | destroy_workqueue(kfd_process_wq); | 89 | destroy_workqueue(kfd_process_wq); |
| 72 | kfd_process_wq = NULL; | 90 | kfd_process_wq = NULL; |
| 73 | } | 91 | } |
| 92 | if (kfd_restore_wq) { | ||
| 93 | destroy_workqueue(kfd_restore_wq); | ||
| 94 | kfd_restore_wq = NULL; | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 98 | static void kfd_process_free_gpuvm(struct kgd_mem *mem, | ||
| 99 | struct kfd_process_device *pdd) | ||
| 100 | { | ||
| 101 | struct kfd_dev *dev = pdd->dev; | ||
| 102 | |||
| 103 | dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm); | ||
| 104 | dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem); | ||
| 105 | } | ||
| 106 | |||
| 107 | /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process | ||
| 108 | * This function should be only called right after the process | ||
| 109 | * is created and when kfd_processes_mutex is still being held | ||
| 110 | * to avoid concurrency. Because of that exclusiveness, we do | ||
| 111 | * not need to take p->mutex. | ||
| 112 | */ | ||
| 113 | static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, | ||
| 114 | uint64_t gpu_va, uint32_t size, | ||
| 115 | uint32_t flags, void **kptr) | ||
| 116 | { | ||
| 117 | struct kfd_dev *kdev = pdd->dev; | ||
| 118 | struct kgd_mem *mem = NULL; | ||
| 119 | int handle; | ||
| 120 | int err; | ||
| 121 | |||
| 122 | err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, | ||
| 123 | pdd->vm, &mem, NULL, flags); | ||
| 124 | if (err) | ||
| 125 | goto err_alloc_mem; | ||
| 126 | |||
| 127 | err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm); | ||
| 128 | if (err) | ||
| 129 | goto err_map_mem; | ||
| 130 | |||
| 131 | err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true); | ||
| 132 | if (err) { | ||
| 133 | pr_debug("Sync memory failed, wait interrupted by user signal\n"); | ||
| 134 | goto sync_memory_failed; | ||
| 135 | } | ||
| 136 | |||
| 137 | /* Create an obj handle so kfd_process_device_remove_obj_handle | ||
| 138 | * will take care of the bo removal when the process finishes. | ||
| 139 | * We do not need to take p->mutex, because the process is just | ||
| 140 | * created and the ioctls have not had the chance to run. | ||
| 141 | */ | ||
| 142 | handle = kfd_process_device_create_obj_handle(pdd, mem); | ||
| 143 | |||
| 144 | if (handle < 0) { | ||
| 145 | err = handle; | ||
| 146 | goto free_gpuvm; | ||
| 147 | } | ||
| 148 | |||
| 149 | if (kptr) { | ||
| 150 | err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd, | ||
| 151 | (struct kgd_mem *)mem, kptr, NULL); | ||
| 152 | if (err) { | ||
| 153 | pr_debug("Map GTT BO to kernel failed\n"); | ||
| 154 | goto free_obj_handle; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | return err; | ||
| 159 | |||
| 160 | free_obj_handle: | ||
| 161 | kfd_process_device_remove_obj_handle(pdd, handle); | ||
| 162 | free_gpuvm: | ||
| 163 | sync_memory_failed: | ||
| 164 | kfd_process_free_gpuvm(mem, pdd); | ||
| 165 | return err; | ||
| 166 | |||
| 167 | err_map_mem: | ||
| 168 | kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); | ||
| 169 | err_alloc_mem: | ||
| 170 | *kptr = NULL; | ||
| 171 | return err; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* kfd_process_device_reserve_ib_mem - Reserve memory inside the | ||
| 175 | * process for IB usage The memory reserved is for KFD to submit | ||
| 176 | * IB to AMDGPU from kernel. If the memory is reserved | ||
| 177 | * successfully, ib_kaddr will have the CPU/kernel | ||
| 178 | * address. Check ib_kaddr before accessing the memory. | ||
| 179 | */ | ||
| 180 | static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) | ||
| 181 | { | ||
| 182 | struct qcm_process_device *qpd = &pdd->qpd; | ||
| 183 | uint32_t flags = ALLOC_MEM_FLAGS_GTT | | ||
| 184 | ALLOC_MEM_FLAGS_NO_SUBSTITUTE | | ||
| 185 | ALLOC_MEM_FLAGS_WRITABLE | | ||
| 186 | ALLOC_MEM_FLAGS_EXECUTABLE; | ||
| 187 | void *kaddr; | ||
| 188 | int ret; | ||
| 189 | |||
| 190 | if (qpd->ib_kaddr || !qpd->ib_base) | ||
| 191 | return 0; | ||
| 192 | |||
| 193 | /* ib_base is only set for dGPU */ | ||
| 194 | ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, | ||
| 195 | &kaddr); | ||
| 196 | if (ret) | ||
| 197 | return ret; | ||
| 198 | |||
| 199 | qpd->ib_kaddr = kaddr; | ||
| 200 | |||
| 201 | return 0; | ||
| 74 | } | 202 | } |
| 75 | 203 | ||
| 76 | struct kfd_process *kfd_create_process(struct file *filep) | 204 | struct kfd_process *kfd_create_process(struct file *filep) |
| @@ -149,6 +277,40 @@ void kfd_unref_process(struct kfd_process *p) | |||
| 149 | kref_put(&p->ref, kfd_process_ref_release); | 277 | kref_put(&p->ref, kfd_process_ref_release); |
| 150 | } | 278 | } |
| 151 | 279 | ||
| 280 | static void kfd_process_device_free_bos(struct kfd_process_device *pdd) | ||
| 281 | { | ||
| 282 | struct kfd_process *p = pdd->process; | ||
| 283 | void *mem; | ||
| 284 | int id; | ||
| 285 | |||
| 286 | /* | ||
| 287 | * Remove all handles from idr and release appropriate | ||
| 288 | * local memory object | ||
| 289 | */ | ||
| 290 | idr_for_each_entry(&pdd->alloc_idr, mem, id) { | ||
| 291 | struct kfd_process_device *peer_pdd; | ||
| 292 | |||
| 293 | list_for_each_entry(peer_pdd, &p->per_device_data, | ||
| 294 | per_device_list) { | ||
| 295 | if (!peer_pdd->vm) | ||
| 296 | continue; | ||
| 297 | peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu( | ||
| 298 | peer_pdd->dev->kgd, mem, peer_pdd->vm); | ||
| 299 | } | ||
| 300 | |||
| 301 | pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem); | ||
| 302 | kfd_process_device_remove_obj_handle(pdd, id); | ||
| 303 | } | ||
| 304 | } | ||
| 305 | |||
| 306 | static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) | ||
| 307 | { | ||
| 308 | struct kfd_process_device *pdd; | ||
| 309 | |||
| 310 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) | ||
| 311 | kfd_process_device_free_bos(pdd); | ||
| 312 | } | ||
| 313 | |||
| 152 | static void kfd_process_destroy_pdds(struct kfd_process *p) | 314 | static void kfd_process_destroy_pdds(struct kfd_process *p) |
| 153 | { | 315 | { |
| 154 | struct kfd_process_device *pdd, *temp; | 316 | struct kfd_process_device *pdd, *temp; |
| @@ -158,16 +320,20 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) | |||
| 158 | pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", | 320 | pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", |
| 159 | pdd->dev->id, p->pasid); | 321 | pdd->dev->id, p->pasid); |
| 160 | 322 | ||
| 161 | if (pdd->vm) | 323 | if (pdd->drm_file) |
| 324 | fput(pdd->drm_file); | ||
| 325 | else if (pdd->vm) | ||
| 162 | pdd->dev->kfd2kgd->destroy_process_vm( | 326 | pdd->dev->kfd2kgd->destroy_process_vm( |
| 163 | pdd->dev->kgd, pdd->vm); | 327 | pdd->dev->kgd, pdd->vm); |
| 164 | 328 | ||
| 165 | list_del(&pdd->per_device_list); | 329 | list_del(&pdd->per_device_list); |
| 166 | 330 | ||
| 167 | if (pdd->qpd.cwsr_kaddr) | 331 | if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) |
| 168 | free_pages((unsigned long)pdd->qpd.cwsr_kaddr, | 332 | free_pages((unsigned long)pdd->qpd.cwsr_kaddr, |
| 169 | get_order(KFD_CWSR_TBA_TMA_SIZE)); | 333 | get_order(KFD_CWSR_TBA_TMA_SIZE)); |
| 170 | 334 | ||
| 335 | idr_destroy(&pdd->alloc_idr); | ||
| 336 | |||
| 171 | kfree(pdd); | 337 | kfree(pdd); |
| 172 | } | 338 | } |
| 173 | } | 339 | } |
| @@ -184,6 +350,8 @@ static void kfd_process_wq_release(struct work_struct *work) | |||
| 184 | 350 | ||
| 185 | kfd_iommu_unbind_process(p); | 351 | kfd_iommu_unbind_process(p); |
| 186 | 352 | ||
| 353 | kfd_process_free_outstanding_kfd_bos(p); | ||
| 354 | |||
| 187 | kfd_process_destroy_pdds(p); | 355 | kfd_process_destroy_pdds(p); |
| 188 | dma_fence_put(p->ef); | 356 | dma_fence_put(p->ef); |
| 189 | 357 | ||
| @@ -271,18 +439,18 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { | |||
| 271 | .release = kfd_process_notifier_release, | 439 | .release = kfd_process_notifier_release, |
| 272 | }; | 440 | }; |
| 273 | 441 | ||
| 274 | static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) | 442 | static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) |
| 275 | { | 443 | { |
| 276 | unsigned long offset; | 444 | unsigned long offset; |
| 277 | struct kfd_process_device *pdd = NULL; | 445 | struct kfd_process_device *pdd; |
| 278 | struct kfd_dev *dev = NULL; | ||
| 279 | struct qcm_process_device *qpd = NULL; | ||
| 280 | 446 | ||
| 281 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { | 447 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { |
| 282 | dev = pdd->dev; | 448 | struct kfd_dev *dev = pdd->dev; |
| 283 | qpd = &pdd->qpd; | 449 | struct qcm_process_device *qpd = &pdd->qpd; |
| 284 | if (!dev->cwsr_enabled || qpd->cwsr_kaddr) | 450 | |
| 451 | if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) | ||
| 285 | continue; | 452 | continue; |
| 453 | |||
| 286 | offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; | 454 | offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; |
| 287 | qpd->tba_addr = (int64_t)vm_mmap(filep, 0, | 455 | qpd->tba_addr = (int64_t)vm_mmap(filep, 0, |
| 288 | KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, | 456 | KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, |
| @@ -307,6 +475,36 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) | |||
| 307 | return 0; | 475 | return 0; |
| 308 | } | 476 | } |
| 309 | 477 | ||
| 478 | static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) | ||
| 479 | { | ||
| 480 | struct kfd_dev *dev = pdd->dev; | ||
| 481 | struct qcm_process_device *qpd = &pdd->qpd; | ||
| 482 | uint32_t flags = ALLOC_MEM_FLAGS_GTT | | ||
| 483 | ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE; | ||
| 484 | void *kaddr; | ||
| 485 | int ret; | ||
| 486 | |||
| 487 | if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) | ||
| 488 | return 0; | ||
| 489 | |||
| 490 | /* cwsr_base is only set for dGPU */ | ||
| 491 | ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, | ||
| 492 | KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr); | ||
| 493 | if (ret) | ||
| 494 | return ret; | ||
| 495 | |||
| 496 | qpd->cwsr_kaddr = kaddr; | ||
| 497 | qpd->tba_addr = qpd->cwsr_base; | ||
| 498 | |||
| 499 | memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); | ||
| 500 | |||
| 501 | qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; | ||
| 502 | pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", | ||
| 503 | qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); | ||
| 504 | |||
| 505 | return 0; | ||
| 506 | } | ||
| 507 | |||
| 310 | static struct kfd_process *create_process(const struct task_struct *thread, | 508 | static struct kfd_process *create_process(const struct task_struct *thread, |
| 311 | struct file *filep) | 509 | struct file *filep) |
| 312 | { | 510 | { |
| @@ -361,13 +559,14 @@ static struct kfd_process *create_process(const struct task_struct *thread, | |||
| 361 | INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); | 559 | INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); |
| 362 | process->last_restore_timestamp = get_jiffies_64(); | 560 | process->last_restore_timestamp = get_jiffies_64(); |
| 363 | 561 | ||
| 364 | err = kfd_process_init_cwsr(process, filep); | 562 | err = kfd_process_init_cwsr_apu(process, filep); |
| 365 | if (err) | 563 | if (err) |
| 366 | goto err_init_cwsr; | 564 | goto err_init_cwsr; |
| 367 | 565 | ||
| 368 | return process; | 566 | return process; |
| 369 | 567 | ||
| 370 | err_init_cwsr: | 568 | err_init_cwsr: |
| 569 | kfd_process_free_outstanding_kfd_bos(process); | ||
| 371 | kfd_process_destroy_pdds(process); | 570 | kfd_process_destroy_pdds(process); |
| 372 | err_init_apertures: | 571 | err_init_apertures: |
| 373 | pqm_uninit(&process->pqm); | 572 | pqm_uninit(&process->pqm); |
| @@ -418,18 +617,70 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, | |||
| 418 | pdd->already_dequeued = false; | 617 | pdd->already_dequeued = false; |
| 419 | list_add(&pdd->per_device_list, &p->per_device_data); | 618 | list_add(&pdd->per_device_list, &p->per_device_data); |
| 420 | 619 | ||
| 421 | /* Create the GPUVM context for this specific device */ | 620 | /* Init idr used for memory handle translation */ |
| 422 | if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm, | 621 | idr_init(&pdd->alloc_idr); |
| 423 | &p->kgd_process_info, &p->ef)) { | 622 | |
| 623 | return pdd; | ||
| 624 | } | ||
| 625 | |||
| 626 | /** | ||
| 627 | * kfd_process_device_init_vm - Initialize a VM for a process-device | ||
| 628 | * | ||
| 629 | * @pdd: The process-device | ||
| 630 | * @drm_file: Optional pointer to a DRM file descriptor | ||
| 631 | * | ||
| 632 | * If @drm_file is specified, it will be used to acquire the VM from | ||
| 633 | * that file descriptor. If successful, the @pdd takes ownership of | ||
| 634 | * the file descriptor. | ||
| 635 | * | ||
| 636 | * If @drm_file is NULL, a new VM is created. | ||
| 637 | * | ||
| 638 | * Returns 0 on success, -errno on failure. | ||
| 639 | */ | ||
| 640 | int kfd_process_device_init_vm(struct kfd_process_device *pdd, | ||
| 641 | struct file *drm_file) | ||
| 642 | { | ||
| 643 | struct kfd_process *p; | ||
| 644 | struct kfd_dev *dev; | ||
| 645 | int ret; | ||
| 646 | |||
| 647 | if (pdd->vm) | ||
| 648 | return drm_file ? -EBUSY : 0; | ||
| 649 | |||
| 650 | p = pdd->process; | ||
| 651 | dev = pdd->dev; | ||
| 652 | |||
| 653 | if (drm_file) | ||
| 654 | ret = dev->kfd2kgd->acquire_process_vm( | ||
| 655 | dev->kgd, drm_file, | ||
| 656 | &pdd->vm, &p->kgd_process_info, &p->ef); | ||
| 657 | else | ||
| 658 | ret = dev->kfd2kgd->create_process_vm( | ||
| 659 | dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef); | ||
| 660 | if (ret) { | ||
| 424 | pr_err("Failed to create process VM object\n"); | 661 | pr_err("Failed to create process VM object\n"); |
| 425 | goto err_create_pdd; | 662 | return ret; |
| 426 | } | 663 | } |
| 427 | return pdd; | ||
| 428 | 664 | ||
| 429 | err_create_pdd: | 665 | ret = kfd_process_device_reserve_ib_mem(pdd); |
| 430 | list_del(&pdd->per_device_list); | 666 | if (ret) |
| 431 | kfree(pdd); | 667 | goto err_reserve_ib_mem; |
| 432 | return NULL; | 668 | ret = kfd_process_device_init_cwsr_dgpu(pdd); |
| 669 | if (ret) | ||
| 670 | goto err_init_cwsr; | ||
| 671 | |||
| 672 | pdd->drm_file = drm_file; | ||
| 673 | |||
| 674 | return 0; | ||
| 675 | |||
| 676 | err_init_cwsr: | ||
| 677 | err_reserve_ib_mem: | ||
| 678 | kfd_process_device_free_bos(pdd); | ||
| 679 | if (!drm_file) | ||
| 680 | dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm); | ||
| 681 | pdd->vm = NULL; | ||
| 682 | |||
| 683 | return ret; | ||
| 433 | } | 684 | } |
| 434 | 685 | ||
| 435 | /* | 686 | /* |
| @@ -455,6 +706,10 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, | |||
| 455 | if (err) | 706 | if (err) |
| 456 | return ERR_PTR(err); | 707 | return ERR_PTR(err); |
| 457 | 708 | ||
| 709 | err = kfd_process_device_init_vm(pdd, NULL); | ||
| 710 | if (err) | ||
| 711 | return ERR_PTR(err); | ||
| 712 | |||
| 458 | return pdd; | 713 | return pdd; |
| 459 | } | 714 | } |
| 460 | 715 | ||
| @@ -480,6 +735,37 @@ bool kfd_has_process_device_data(struct kfd_process *p) | |||
| 480 | return !(list_empty(&p->per_device_data)); | 735 | return !(list_empty(&p->per_device_data)); |
| 481 | } | 736 | } |
| 482 | 737 | ||
| 738 | /* Create specific handle mapped to mem from process local memory idr | ||
| 739 | * Assumes that the process lock is held. | ||
| 740 | */ | ||
| 741 | int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, | ||
| 742 | void *mem) | ||
| 743 | { | ||
| 744 | return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); | ||
| 745 | } | ||
| 746 | |||
| 747 | /* Translate specific handle from process local memory idr | ||
| 748 | * Assumes that the process lock is held. | ||
| 749 | */ | ||
| 750 | void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, | ||
| 751 | int handle) | ||
| 752 | { | ||
| 753 | if (handle < 0) | ||
| 754 | return NULL; | ||
| 755 | |||
| 756 | return idr_find(&pdd->alloc_idr, handle); | ||
| 757 | } | ||
| 758 | |||
| 759 | /* Remove specific handle from process local memory idr | ||
| 760 | * Assumes that the process lock is held. | ||
| 761 | */ | ||
| 762 | void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, | ||
| 763 | int handle) | ||
| 764 | { | ||
| 765 | if (handle >= 0) | ||
| 766 | idr_remove(&pdd->alloc_idr, handle); | ||
| 767 | } | ||
| 768 | |||
| 483 | /* This increments the process->ref counter. */ | 769 | /* This increments the process->ref counter. */ |
| 484 | struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) | 770 | struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) |
| 485 | { | 771 | { |
| @@ -605,7 +891,7 @@ static void evict_process_worker(struct work_struct *work) | |||
| 605 | dma_fence_signal(p->ef); | 891 | dma_fence_signal(p->ef); |
| 606 | dma_fence_put(p->ef); | 892 | dma_fence_put(p->ef); |
| 607 | p->ef = NULL; | 893 | p->ef = NULL; |
| 608 | schedule_delayed_work(&p->restore_work, | 894 | queue_delayed_work(kfd_restore_wq, &p->restore_work, |
| 609 | msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); | 895 | msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); |
| 610 | 896 | ||
| 611 | pr_debug("Finished evicting pasid %d\n", p->pasid); | 897 | pr_debug("Finished evicting pasid %d\n", p->pasid); |
| @@ -654,7 +940,7 @@ static void restore_process_worker(struct work_struct *work) | |||
| 654 | if (ret) { | 940 | if (ret) { |
| 655 | pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", | 941 | pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", |
| 656 | p->pasid, PROCESS_BACK_OFF_TIME_MS); | 942 | p->pasid, PROCESS_BACK_OFF_TIME_MS); |
| 657 | ret = schedule_delayed_work(&p->restore_work, | 943 | ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, |
| 658 | msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); | 944 | msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); |
| 659 | WARN(!ret, "reschedule restore work failed\n"); | 945 | WARN(!ret, "reschedule restore work failed\n"); |
| 660 | return; | 946 | return; |
| @@ -693,7 +979,7 @@ int kfd_resume_all_processes(void) | |||
| 693 | int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); | 979 | int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); |
| 694 | 980 | ||
| 695 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | 981 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { |
| 696 | if (!schedule_delayed_work(&p->restore_work, 0)) { | 982 | if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { |
| 697 | pr_err("Restore process %d failed during resume\n", | 983 | pr_err("Restore process %d failed during resume\n", |
| 698 | p->pasid); | 984 | p->pasid); |
| 699 | ret = -EFAULT; | 985 | ret = -EFAULT; |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 250615535563..ac28abc94e57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c | |||
| @@ -441,6 +441,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, | |||
| 441 | dev->node_props.device_id); | 441 | dev->node_props.device_id); |
| 442 | sysfs_show_32bit_prop(buffer, "location_id", | 442 | sysfs_show_32bit_prop(buffer, "location_id", |
| 443 | dev->node_props.location_id); | 443 | dev->node_props.location_id); |
| 444 | sysfs_show_32bit_prop(buffer, "drm_render_minor", | ||
| 445 | dev->node_props.drm_render_minor); | ||
| 444 | 446 | ||
| 445 | if (dev->gpu) { | 447 | if (dev->gpu) { |
| 446 | log_max_watch_addr = | 448 | log_max_watch_addr = |
| @@ -1214,6 +1216,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) | |||
| 1214 | dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd); | 1216 | dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd); |
| 1215 | dev->node_props.max_engine_clk_ccompute = | 1217 | dev->node_props.max_engine_clk_ccompute = |
| 1216 | cpufreq_quick_get_max(0) / 1000; | 1218 | cpufreq_quick_get_max(0) / 1000; |
| 1219 | dev->node_props.drm_render_minor = | ||
| 1220 | gpu->shared_resources.drm_render_minor; | ||
| 1217 | 1221 | ||
| 1218 | kfd_fill_mem_clk_max_info(dev); | 1222 | kfd_fill_mem_clk_max_info(dev); |
| 1219 | kfd_fill_iolink_non_crat_info(dev); | 1223 | kfd_fill_iolink_non_crat_info(dev); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index c0be2be6dca5..eb54cfcaf039 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h | |||
| @@ -71,6 +71,7 @@ struct kfd_node_properties { | |||
| 71 | uint32_t location_id; | 71 | uint32_t location_id; |
| 72 | uint32_t max_engine_clk_fcompute; | 72 | uint32_t max_engine_clk_fcompute; |
| 73 | uint32_t max_engine_clk_ccompute; | 73 | uint32_t max_engine_clk_ccompute; |
| 74 | int32_t drm_render_minor; | ||
| 74 | uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; | 75 | uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; |
| 75 | }; | 76 | }; |
| 76 | 77 | ||
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 1e5c22ceb256..237289a72bb7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h | |||
| @@ -130,6 +130,7 @@ struct tile_config { | |||
| 130 | 130 | ||
| 131 | /* | 131 | /* |
| 132 | * Allocation flag domains | 132 | * Allocation flag domains |
| 133 | * NOTE: This must match the corresponding definitions in kfd_ioctl.h. | ||
| 133 | */ | 134 | */ |
| 134 | #define ALLOC_MEM_FLAGS_VRAM (1 << 0) | 135 | #define ALLOC_MEM_FLAGS_VRAM (1 << 0) |
| 135 | #define ALLOC_MEM_FLAGS_GTT (1 << 1) | 136 | #define ALLOC_MEM_FLAGS_GTT (1 << 1) |
| @@ -138,6 +139,7 @@ struct tile_config { | |||
| 138 | 139 | ||
| 139 | /* | 140 | /* |
| 140 | * Allocation flags attributes/access options. | 141 | * Allocation flags attributes/access options. |
| 142 | * NOTE: This must match the corresponding definitions in kfd_ioctl.h. | ||
| 141 | */ | 143 | */ |
| 142 | #define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) | 144 | #define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) |
| 143 | #define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) | 145 | #define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) |
| @@ -336,6 +338,8 @@ struct kfd2kgd_calls { | |||
| 336 | 338 | ||
| 337 | int (*create_process_vm)(struct kgd_dev *kgd, void **vm, | 339 | int (*create_process_vm)(struct kgd_dev *kgd, void **vm, |
| 338 | void **process_info, struct dma_fence **ef); | 340 | void **process_info, struct dma_fence **ef); |
| 341 | int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp, | ||
| 342 | void **vm, void **process_info, struct dma_fence **ef); | ||
| 339 | void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); | 343 | void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); |
| 340 | uint32_t (*get_process_page_dir)(void *vm); | 344 | uint32_t (*get_process_page_dir)(void *vm); |
| 341 | void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, | 345 | void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, |
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 111d73ba2d96..b4f5073dbac2 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h | |||
| @@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args { | |||
| 107 | __u32 pad; | 107 | __u32 pad; |
| 108 | }; | 108 | }; |
| 109 | 109 | ||
| 110 | #define NUM_OF_SUPPORTED_GPUS 7 | ||
| 111 | |||
| 112 | struct kfd_process_device_apertures { | 110 | struct kfd_process_device_apertures { |
| 113 | __u64 lds_base; /* from KFD */ | 111 | __u64 lds_base; /* from KFD */ |
| 114 | __u64 lds_limit; /* from KFD */ | 112 | __u64 lds_limit; /* from KFD */ |
| @@ -120,6 +118,12 @@ struct kfd_process_device_apertures { | |||
| 120 | __u32 pad; | 118 | __u32 pad; |
| 121 | }; | 119 | }; |
| 122 | 120 | ||
| 121 | /* | ||
| 122 | * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use | ||
| 123 | * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an | ||
| 124 | * unlimited number of GPUs. | ||
| 125 | */ | ||
| 126 | #define NUM_OF_SUPPORTED_GPUS 7 | ||
| 123 | struct kfd_ioctl_get_process_apertures_args { | 127 | struct kfd_ioctl_get_process_apertures_args { |
| 124 | struct kfd_process_device_apertures | 128 | struct kfd_process_device_apertures |
| 125 | process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ | 129 | process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ |
| @@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args { | |||
| 129 | __u32 pad; | 133 | __u32 pad; |
| 130 | }; | 134 | }; |
| 131 | 135 | ||
| 136 | struct kfd_ioctl_get_process_apertures_new_args { | ||
| 137 | /* User allocated. Pointer to struct kfd_process_device_apertures | ||
| 138 | * filled in by Kernel | ||
| 139 | */ | ||
| 140 | __u64 kfd_process_device_apertures_ptr; | ||
| 141 | /* to KFD - indicates amount of memory present in | ||
| 142 | * kfd_process_device_apertures_ptr | ||
| 143 | * from KFD - Number of entries filled by KFD. | ||
| 144 | */ | ||
| 145 | __u32 num_of_nodes; | ||
| 146 | __u32 pad; | ||
| 147 | }; | ||
| 148 | |||
| 132 | #define MAX_ALLOWED_NUM_POINTS 100 | 149 | #define MAX_ALLOWED_NUM_POINTS 100 |
| 133 | #define MAX_ALLOWED_AW_BUFF_SIZE 4096 | 150 | #define MAX_ALLOWED_AW_BUFF_SIZE 4096 |
| 134 | #define MAX_ALLOWED_WAC_BUFF_SIZE 128 | 151 | #define MAX_ALLOWED_WAC_BUFF_SIZE 128 |
| @@ -269,6 +286,86 @@ struct kfd_ioctl_set_trap_handler_args { | |||
| 269 | __u32 pad; | 286 | __u32 pad; |
| 270 | }; | 287 | }; |
| 271 | 288 | ||
| 289 | struct kfd_ioctl_acquire_vm_args { | ||
| 290 | __u32 drm_fd; /* to KFD */ | ||
| 291 | __u32 gpu_id; /* to KFD */ | ||
| 292 | }; | ||
| 293 | |||
| 294 | /* Allocation flags: memory types */ | ||
| 295 | #define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) | ||
| 296 | #define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) | ||
| 297 | #define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) | ||
| 298 | #define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) | ||
| 299 | /* Allocation flags: attributes/access options */ | ||
| 300 | #define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) | ||
| 301 | #define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) | ||
| 302 | #define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) | ||
| 303 | #define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) | ||
| 304 | #define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) | ||
| 305 | #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26) | ||
| 306 | |||
| 307 | /* Allocate memory for later SVM (shared virtual memory) mapping. | ||
| 308 | * | ||
| 309 | * @va_addr: virtual address of the memory to be allocated | ||
| 310 | * all later mappings on all GPUs will use this address | ||
| 311 | * @size: size in bytes | ||
| 312 | * @handle: buffer handle returned to user mode, used to refer to | ||
| 313 | * this allocation for mapping, unmapping and freeing | ||
| 314 | * @mmap_offset: for CPU-mapping the allocation by mmapping a render node | ||
| 315 | * for userptrs this is overloaded to specify the CPU address | ||
| 316 | * @gpu_id: device identifier | ||
| 317 | * @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above | ||
| 318 | */ | ||
| 319 | struct kfd_ioctl_alloc_memory_of_gpu_args { | ||
| 320 | __u64 va_addr; /* to KFD */ | ||
| 321 | __u64 size; /* to KFD */ | ||
| 322 | __u64 handle; /* from KFD */ | ||
| 323 | __u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */ | ||
| 324 | __u32 gpu_id; /* to KFD */ | ||
| 325 | __u32 flags; | ||
| 326 | }; | ||
| 327 | |||
| 328 | /* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu | ||
| 329 | * | ||
| 330 | * @handle: memory handle returned by alloc | ||
| 331 | */ | ||
| 332 | struct kfd_ioctl_free_memory_of_gpu_args { | ||
| 333 | __u64 handle; /* to KFD */ | ||
| 334 | }; | ||
| 335 | |||
| 336 | /* Map memory to one or more GPUs | ||
| 337 | * | ||
| 338 | * @handle: memory handle returned by alloc | ||
| 339 | * @device_ids_array_ptr: array of gpu_ids (__u32 per device) | ||
| 340 | * @n_devices: number of devices in the array | ||
| 341 | * @n_success: number of devices mapped successfully | ||
| 342 | * | ||
| 343 | * @n_success returns information to the caller how many devices from | ||
| 344 | * the start of the array have mapped the buffer successfully. It can | ||
| 345 | * be passed into a subsequent retry call to skip those devices. For | ||
| 346 | * the first call the caller should initialize it to 0. | ||
| 347 | * | ||
| 348 | * If the ioctl completes with return code 0 (success), n_success == | ||
| 349 | * n_devices. | ||
| 350 | */ | ||
| 351 | struct kfd_ioctl_map_memory_to_gpu_args { | ||
| 352 | __u64 handle; /* to KFD */ | ||
| 353 | __u64 device_ids_array_ptr; /* to KFD */ | ||
| 354 | __u32 n_devices; /* to KFD */ | ||
| 355 | __u32 n_success; /* to/from KFD */ | ||
| 356 | }; | ||
| 357 | |||
| 358 | /* Unmap memory from one or more GPUs | ||
| 359 | * | ||
| 360 | * same arguments as for mapping | ||
| 361 | */ | ||
| 362 | struct kfd_ioctl_unmap_memory_from_gpu_args { | ||
| 363 | __u64 handle; /* to KFD */ | ||
| 364 | __u64 device_ids_array_ptr; /* to KFD */ | ||
| 365 | __u32 n_devices; /* to KFD */ | ||
| 366 | __u32 n_success; /* to/from KFD */ | ||
| 367 | }; | ||
| 368 | |||
| 272 | #define AMDKFD_IOCTL_BASE 'K' | 369 | #define AMDKFD_IOCTL_BASE 'K' |
| 273 | #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) | 370 | #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) |
| 274 | #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) | 371 | #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) |
| @@ -332,7 +429,26 @@ struct kfd_ioctl_set_trap_handler_args { | |||
| 332 | #define AMDKFD_IOC_SET_TRAP_HANDLER \ | 429 | #define AMDKFD_IOC_SET_TRAP_HANDLER \ |
| 333 | AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) | 430 | AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) |
| 334 | 431 | ||
| 432 | #define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \ | ||
| 433 | AMDKFD_IOWR(0x14, \ | ||
| 434 | struct kfd_ioctl_get_process_apertures_new_args) | ||
| 435 | |||
| 436 | #define AMDKFD_IOC_ACQUIRE_VM \ | ||
| 437 | AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args) | ||
| 438 | |||
| 439 | #define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \ | ||
| 440 | AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args) | ||
| 441 | |||
| 442 | #define AMDKFD_IOC_FREE_MEMORY_OF_GPU \ | ||
| 443 | AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args) | ||
| 444 | |||
| 445 | #define AMDKFD_IOC_MAP_MEMORY_TO_GPU \ | ||
| 446 | AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args) | ||
| 447 | |||
| 448 | #define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ | ||
| 449 | AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) | ||
| 450 | |||
| 335 | #define AMDKFD_COMMAND_START 0x01 | 451 | #define AMDKFD_COMMAND_START 0x01 |
| 336 | #define AMDKFD_COMMAND_END 0x14 | 452 | #define AMDKFD_COMMAND_END 0x1A |
| 337 | 453 | ||
| 338 | #endif | 454 | #endif |
