aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-03-28 00:49:19 -0400
committerDave Airlie <airlied@redhat.com>2018-03-28 00:49:19 -0400
commit9f36f9c8eed847ee8920ecac689bbf3dd4660774 (patch)
tree88377565adc8d3261f90f07986c3234571a05601
parentcb17aa521e0e00db92463ad306660f3f23ae9657 (diff)
parent1679ae8f8f4148766423066aeb3dbb0a985a373a (diff)
Merge tag 'drm-amdkfd-next-2018-03-27' of git://people.freedesktop.org/~gabbayo/linux into drm-next
- GPUVM support for dGPUs - KFD events support for dGPUs - Fix live-lock situation when restoring multiple evicted processes - Fix VM page table allocation on large-bar systems - Fix for build failure on frv architecture * tag 'drm-amdkfd-next-2018-03-27' of git://people.freedesktop.org/~gabbayo/linux: drm/amdkfd: Use ordered workqueue to restore processes drm/amdgpu: Fix acquiring VM on large-BAR systems drm/amdkfd: Add module option for testing large-BAR functionality drm/amdkfd: Kmap event page for dGPUs drm/amdkfd: Add ioctls for GPUVM memory management drm/amdkfd: Add TC flush on VMID deallocation for Hawaii drm/amdkfd: Allocate CWSR trap handler memory for dGPUs drm/amdkfd: Add per-process IDR for buffer handles drm/amdkfd: Aperture setup for dGPUs drm/amdkfd: Remove limit on number of GPUs drm/amdkfd: Populate DRM render device minor drm/amdkfd: Create KFD VMs on demand drm/amdgpu: Add kfd2kgd interface to acquire an existing VM drm/amdgpu: Add helper to turn an existing VM into a compute VM drm/amdgpu: Fix initial validation of PD BO for KFD VMs drm/amdgpu: Move KFD-specific fields into struct amdgpu_vm drm/amdkfd: fix uninitialized variable use drm/amdkfd: add missing include of mm.h
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c249
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c532
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c59
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c37
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h39
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c334
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h4
-rw-r--r--include/uapi/linux/kfd_ioctl.h122
19 files changed, 1398 insertions, 165 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index d7509b706b26..c2c2bea731e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -26,6 +26,7 @@
26#define AMDGPU_AMDKFD_H_INCLUDED 26#define AMDGPU_AMDKFD_H_INCLUDED
27 27
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/mm.h>
29#include <linux/mmu_context.h> 30#include <linux/mmu_context.h>
30#include <kgd_kfd_interface.h> 31#include <kgd_kfd_interface.h>
31#include <drm/ttm/ttm_execbuf_util.h> 32#include <drm/ttm/ttm_execbuf_util.h>
@@ -92,27 +93,6 @@ struct amdkfd_process_info {
92 struct amdgpu_amdkfd_fence *eviction_fence; 93 struct amdgpu_amdkfd_fence *eviction_fence;
93}; 94};
94 95
95/* struct amdkfd_vm -
96 * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
97 * belonging to a KFD process. All the VMs belonging to the same process point
98 * to the same amdkfd_process_info.
99 */
100struct amdkfd_vm {
101 /* Keep base as the first parameter for pointer compatibility between
102 * amdkfd_vm and amdgpu_vm.
103 */
104 struct amdgpu_vm base;
105
106 /* List node in amdkfd_process_info.vm_list_head*/
107 struct list_head vm_list_node;
108
109 struct amdgpu_device *adev;
110 /* Points to the KFD process VM info*/
111 struct amdkfd_process_info *process_info;
112
113 uint64_t pd_phys_addr;
114};
115
116int amdgpu_amdkfd_init(void); 96int amdgpu_amdkfd_init(void);
117void amdgpu_amdkfd_fini(void); 97void amdgpu_amdkfd_fini(void);
118 98
@@ -165,6 +145,12 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
165int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, 145int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
166 void **process_info, 146 void **process_info,
167 struct dma_fence **ef); 147 struct dma_fence **ef);
148int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
149 struct file *filp,
150 void **vm, void **process_info,
151 struct dma_fence **ef);
152void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
153 struct amdgpu_vm *vm);
168void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); 154void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
169uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); 155uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
170int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 156int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 7485c376b90e..ea54e53172b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
205 .get_cu_info = get_cu_info, 205 .get_cu_info = get_cu_info,
206 .get_vram_usage = amdgpu_amdkfd_get_vram_usage, 206 .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
207 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, 207 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
208 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
208 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, 209 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
209 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, 210 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
210 .set_vm_context_page_table_base = set_vm_context_page_table_base, 211 .set_vm_context_page_table_base = set_vm_context_page_table_base,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 7be453494423..89264c9a5e9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -165,6 +165,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
165 .get_cu_info = get_cu_info, 165 .get_cu_info = get_cu_info,
166 .get_vram_usage = amdgpu_amdkfd_get_vram_usage, 166 .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
167 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, 167 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
168 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
168 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, 169 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
169 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, 170 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
170 .set_vm_context_page_table_base = set_vm_context_page_table_base, 171 .set_vm_context_page_table_base = set_vm_context_page_table_base,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a12a1654e124..1d6e1479da38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -333,9 +333,9 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
333 * again. Page directories are only updated after updating page 333 * again. Page directories are only updated after updating page
334 * tables. 334 * tables.
335 */ 335 */
336static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) 336static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
337{ 337{
338 struct amdgpu_bo *pd = vm->base.root.base.bo; 338 struct amdgpu_bo *pd = vm->root.base.bo;
339 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); 339 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
340 struct amdgpu_vm_parser param; 340 struct amdgpu_vm_parser param;
341 uint64_t addr, flags = AMDGPU_PTE_VALID; 341 uint64_t addr, flags = AMDGPU_PTE_VALID;
@@ -344,7 +344,7 @@ static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
344 param.domain = AMDGPU_GEM_DOMAIN_VRAM; 344 param.domain = AMDGPU_GEM_DOMAIN_VRAM;
345 param.wait = false; 345 param.wait = false;
346 346
347 ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, 347 ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
348 &param); 348 &param);
349 if (ret) { 349 if (ret) {
350 pr_err("amdgpu: failed to validate PT BOs\n"); 350 pr_err("amdgpu: failed to validate PT BOs\n");
@@ -357,11 +357,11 @@ static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
357 return ret; 357 return ret;
358 } 358 }
359 359
360 addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); 360 addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
361 amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); 361 amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
362 vm->pd_phys_addr = addr; 362 vm->pd_phys_addr = addr;
363 363
364 if (vm->base.use_cpu_for_update) { 364 if (vm->use_cpu_for_update) {
365 ret = amdgpu_bo_kmap(pd, NULL); 365 ret = amdgpu_bo_kmap(pd, NULL);
366 if (ret) { 366 if (ret) {
367 pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); 367 pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
@@ -415,14 +415,12 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
415 * 4a. Validate new page tables and directories 415 * 4a. Validate new page tables and directories
416 */ 416 */
417static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, 417static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
418 struct amdgpu_vm *avm, bool is_aql, 418 struct amdgpu_vm *vm, bool is_aql,
419 struct kfd_bo_va_list **p_bo_va_entry) 419 struct kfd_bo_va_list **p_bo_va_entry)
420{ 420{
421 int ret; 421 int ret;
422 struct kfd_bo_va_list *bo_va_entry; 422 struct kfd_bo_va_list *bo_va_entry;
423 struct amdkfd_vm *kvm = container_of(avm, 423 struct amdgpu_bo *pd = vm->root.base.bo;
424 struct amdkfd_vm, base);
425 struct amdgpu_bo *pd = avm->root.base.bo;
426 struct amdgpu_bo *bo = mem->bo; 424 struct amdgpu_bo *bo = mem->bo;
427 uint64_t va = mem->va; 425 uint64_t va = mem->va;
428 struct list_head *list_bo_va = &mem->bo_va_list; 426 struct list_head *list_bo_va = &mem->bo_va_list;
@@ -441,10 +439,10 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
441 return -ENOMEM; 439 return -ENOMEM;
442 440
443 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, 441 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
444 va + bo_size, avm); 442 va + bo_size, vm);
445 443
446 /* Add BO to VM internal data structures*/ 444 /* Add BO to VM internal data structures*/
447 bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); 445 bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
448 if (!bo_va_entry->bo_va) { 446 if (!bo_va_entry->bo_va) {
449 ret = -EINVAL; 447 ret = -EINVAL;
450 pr_err("Failed to add BO object to VM. ret == %d\n", 448 pr_err("Failed to add BO object to VM. ret == %d\n",
@@ -467,28 +465,28 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
467 * fence, so remove it temporarily. 465 * fence, so remove it temporarily.
468 */ 466 */
469 amdgpu_amdkfd_remove_eviction_fence(pd, 467 amdgpu_amdkfd_remove_eviction_fence(pd,
470 kvm->process_info->eviction_fence, 468 vm->process_info->eviction_fence,
471 NULL, NULL); 469 NULL, NULL);
472 470
473 ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); 471 ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
474 if (ret) { 472 if (ret) {
475 pr_err("Failed to allocate pts, err=%d\n", ret); 473 pr_err("Failed to allocate pts, err=%d\n", ret);
476 goto err_alloc_pts; 474 goto err_alloc_pts;
477 } 475 }
478 476
479 ret = vm_validate_pt_pd_bos(kvm); 477 ret = vm_validate_pt_pd_bos(vm);
480 if (ret) { 478 if (ret) {
481 pr_err("validate_pt_pd_bos() failed\n"); 479 pr_err("validate_pt_pd_bos() failed\n");
482 goto err_alloc_pts; 480 goto err_alloc_pts;
483 } 481 }
484 482
485 /* Add the eviction fence back */ 483 /* Add the eviction fence back */
486 amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); 484 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
487 485
488 return 0; 486 return 0;
489 487
490err_alloc_pts: 488err_alloc_pts:
491 amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); 489 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
492 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); 490 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
493 list_del(&bo_va_entry->bo_list); 491 list_del(&bo_va_entry->bo_list);
494err_vmadd: 492err_vmadd:
@@ -703,7 +701,6 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
703{ 701{
704 struct amdgpu_bo_va *bo_va = entry->bo_va; 702 struct amdgpu_bo_va *bo_va = entry->bo_va;
705 struct amdgpu_vm *vm = bo_va->base.vm; 703 struct amdgpu_vm *vm = bo_va->base.vm;
706 struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
707 struct amdgpu_bo *pd = vm->root.base.bo; 704 struct amdgpu_bo *pd = vm->root.base.bo;
708 705
709 /* Remove eviction fence from PD (and thereby from PTs too as 706 /* Remove eviction fence from PD (and thereby from PTs too as
@@ -713,14 +710,14 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
713 * trigger the eviction fence. 710 * trigger the eviction fence.
714 */ 711 */
715 amdgpu_amdkfd_remove_eviction_fence(pd, 712 amdgpu_amdkfd_remove_eviction_fence(pd,
716 kvm->process_info->eviction_fence, 713 vm->process_info->eviction_fence,
717 NULL, NULL); 714 NULL, NULL);
718 amdgpu_vm_bo_unmap(adev, bo_va, entry->va); 715 amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
719 716
720 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 717 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
721 718
722 /* Add the eviction fence back */ 719 /* Add the eviction fence back */
723 amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); 720 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
724 721
725 sync_vm_fence(adev, sync, bo_va->last_pt_update); 722 sync_vm_fence(adev, sync, bo_va->last_pt_update);
726 723
@@ -780,7 +777,7 @@ update_gpuvm_pte_failed:
780 777
781static int process_validate_vms(struct amdkfd_process_info *process_info) 778static int process_validate_vms(struct amdkfd_process_info *process_info)
782{ 779{
783 struct amdkfd_vm *peer_vm; 780 struct amdgpu_vm *peer_vm;
784 int ret; 781 int ret;
785 782
786 list_for_each_entry(peer_vm, &process_info->vm_list_head, 783 list_for_each_entry(peer_vm, &process_info->vm_list_head,
@@ -796,12 +793,12 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
796static int process_update_pds(struct amdkfd_process_info *process_info, 793static int process_update_pds(struct amdkfd_process_info *process_info,
797 struct amdgpu_sync *sync) 794 struct amdgpu_sync *sync)
798{ 795{
799 struct amdkfd_vm *peer_vm; 796 struct amdgpu_vm *peer_vm;
800 int ret; 797 int ret;
801 798
802 list_for_each_entry(peer_vm, &process_info->vm_list_head, 799 list_for_each_entry(peer_vm, &process_info->vm_list_head,
803 vm_list_node) { 800 vm_list_node) {
804 ret = vm_update_pds(&peer_vm->base, sync); 801 ret = vm_update_pds(peer_vm, sync);
805 if (ret) 802 if (ret)
806 return ret; 803 return ret;
807 } 804 }
@@ -809,33 +806,16 @@ static int process_update_pds(struct amdkfd_process_info *process_info,
809 return 0; 806 return 0;
810} 807}
811 808
812int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, 809static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
813 void **process_info, 810 struct dma_fence **ef)
814 struct dma_fence **ef)
815{ 811{
812 struct amdkfd_process_info *info = NULL;
816 int ret; 813 int ret;
817 struct amdkfd_vm *new_vm;
818 struct amdkfd_process_info *info;
819 struct amdgpu_device *adev = get_amdgpu_device(kgd);
820
821 new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
822 if (!new_vm)
823 return -ENOMEM;
824
825 /* Initialize the VM context, allocate the page directory and zero it */
826 ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0);
827 if (ret) {
828 pr_err("Failed init vm ret %d\n", ret);
829 goto vm_init_fail;
830 }
831 new_vm->adev = adev;
832 814
833 if (!*process_info) { 815 if (!*process_info) {
834 info = kzalloc(sizeof(*info), GFP_KERNEL); 816 info = kzalloc(sizeof(*info), GFP_KERNEL);
835 if (!info) { 817 if (!info)
836 ret = -ENOMEM; 818 return -ENOMEM;
837 goto alloc_process_info_fail;
838 }
839 819
840 mutex_init(&info->lock); 820 mutex_init(&info->lock);
841 INIT_LIST_HEAD(&info->vm_list_head); 821 INIT_LIST_HEAD(&info->vm_list_head);
@@ -846,6 +826,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
846 current->mm); 826 current->mm);
847 if (!info->eviction_fence) { 827 if (!info->eviction_fence) {
848 pr_err("Failed to create eviction fence\n"); 828 pr_err("Failed to create eviction fence\n");
829 ret = -ENOMEM;
849 goto create_evict_fence_fail; 830 goto create_evict_fence_fail;
850 } 831 }
851 832
@@ -853,57 +834,137 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
853 *ef = dma_fence_get(&info->eviction_fence->base); 834 *ef = dma_fence_get(&info->eviction_fence->base);
854 } 835 }
855 836
856 new_vm->process_info = *process_info; 837 vm->process_info = *process_info;
857
858 mutex_lock(&new_vm->process_info->lock);
859 list_add_tail(&new_vm->vm_list_node,
860 &(new_vm->process_info->vm_list_head));
861 new_vm->process_info->n_vms++;
862 mutex_unlock(&new_vm->process_info->lock);
863 838
864 *vm = (void *) new_vm; 839 /* Validate page directory and attach eviction fence */
840 ret = amdgpu_bo_reserve(vm->root.base.bo, true);
841 if (ret)
842 goto reserve_pd_fail;
843 ret = vm_validate_pt_pd_bos(vm);
844 if (ret) {
845 pr_err("validate_pt_pd_bos() failed\n");
846 goto validate_pd_fail;
847 }
848 ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
849 if (ret)
850 goto wait_pd_fail;
851 amdgpu_bo_fence(vm->root.base.bo,
852 &vm->process_info->eviction_fence->base, true);
853 amdgpu_bo_unreserve(vm->root.base.bo);
854
855 /* Update process info */
856 mutex_lock(&vm->process_info->lock);
857 list_add_tail(&vm->vm_list_node,
858 &(vm->process_info->vm_list_head));
859 vm->process_info->n_vms++;
860 mutex_unlock(&vm->process_info->lock);
865 861
866 pr_debug("Created process vm %p\n", *vm); 862 return 0;
867 863
864wait_pd_fail:
865validate_pd_fail:
866 amdgpu_bo_unreserve(vm->root.base.bo);
867reserve_pd_fail:
868 vm->process_info = NULL;
869 if (info) {
870 /* Two fence references: one in info and one in *ef */
871 dma_fence_put(&info->eviction_fence->base);
872 dma_fence_put(*ef);
873 *ef = NULL;
874 *process_info = NULL;
875create_evict_fence_fail:
876 mutex_destroy(&info->lock);
877 kfree(info);
878 }
868 return ret; 879 return ret;
880}
869 881
870create_evict_fence_fail: 882int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
871 mutex_destroy(&info->lock); 883 void **process_info,
872 kfree(info); 884 struct dma_fence **ef)
873alloc_process_info_fail: 885{
874 amdgpu_vm_fini(adev, &new_vm->base); 886 struct amdgpu_device *adev = get_amdgpu_device(kgd);
875vm_init_fail: 887 struct amdgpu_vm *new_vm;
888 int ret;
889
890 new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
891 if (!new_vm)
892 return -ENOMEM;
893
894 /* Initialize AMDGPU part of the VM */
895 ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
896 if (ret) {
897 pr_err("Failed init vm ret %d\n", ret);
898 goto amdgpu_vm_init_fail;
899 }
900
901 /* Initialize KFD part of the VM and process info */
902 ret = init_kfd_vm(new_vm, process_info, ef);
903 if (ret)
904 goto init_kfd_vm_fail;
905
906 *vm = (void *) new_vm;
907
908 return 0;
909
910init_kfd_vm_fail:
911 amdgpu_vm_fini(adev, new_vm);
912amdgpu_vm_init_fail:
876 kfree(new_vm); 913 kfree(new_vm);
877 return ret; 914 return ret;
878
879} 915}
880 916
881void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) 917int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
918 struct file *filp,
919 void **vm, void **process_info,
920 struct dma_fence **ef)
882{ 921{
883 struct amdgpu_device *adev = get_amdgpu_device(kgd); 922 struct amdgpu_device *adev = get_amdgpu_device(kgd);
884 struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; 923 struct drm_file *drm_priv = filp->private_data;
885 struct amdgpu_vm *avm = &kfd_vm->base; 924 struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
886 struct amdgpu_bo *pd; 925 struct amdgpu_vm *avm = &drv_priv->vm;
887 struct amdkfd_process_info *process_info; 926 int ret;
888 927
889 if (WARN_ON(!kgd || !vm)) 928 /* Already a compute VM? */
929 if (avm->process_info)
930 return -EINVAL;
931
932 /* Convert VM into a compute VM */
933 ret = amdgpu_vm_make_compute(adev, avm);
934 if (ret)
935 return ret;
936
937 /* Initialize KFD part of the VM and process info */
938 ret = init_kfd_vm(avm, process_info, ef);
939 if (ret)
940 return ret;
941
942 *vm = (void *)avm;
943
944 return 0;
945}
946
947void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
948 struct amdgpu_vm *vm)
949{
950 struct amdkfd_process_info *process_info = vm->process_info;
951 struct amdgpu_bo *pd = vm->root.base.bo;
952
953 if (!process_info)
890 return; 954 return;
891 955
892 pr_debug("Destroying process vm %p\n", vm);
893 /* Release eviction fence from PD */ 956 /* Release eviction fence from PD */
894 pd = avm->root.base.bo;
895 amdgpu_bo_reserve(pd, false); 957 amdgpu_bo_reserve(pd, false);
896 amdgpu_bo_fence(pd, NULL, false); 958 amdgpu_bo_fence(pd, NULL, false);
897 amdgpu_bo_unreserve(pd); 959 amdgpu_bo_unreserve(pd);
898 960
899 process_info = kfd_vm->process_info; 961 /* Update process info */
900
901 mutex_lock(&process_info->lock); 962 mutex_lock(&process_info->lock);
902 process_info->n_vms--; 963 process_info->n_vms--;
903 list_del(&kfd_vm->vm_list_node); 964 list_del(&vm->vm_list_node);
904 mutex_unlock(&process_info->lock); 965 mutex_unlock(&process_info->lock);
905 966
906 /* Release per-process resources */ 967 /* Release per-process resources when last compute VM is destroyed */
907 if (!process_info->n_vms) { 968 if (!process_info->n_vms) {
908 WARN_ON(!list_empty(&process_info->kfd_bo_list)); 969 WARN_ON(!list_empty(&process_info->kfd_bo_list));
909 970
@@ -911,6 +972,17 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
911 mutex_destroy(&process_info->lock); 972 mutex_destroy(&process_info->lock);
912 kfree(process_info); 973 kfree(process_info);
913 } 974 }
975}
976
977void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
978{
979 struct amdgpu_device *adev = get_amdgpu_device(kgd);
980 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
981
982 if (WARN_ON(!kgd || !vm))
983 return;
984
985 pr_debug("Destroying process vm %p\n", vm);
914 986
915 /* Release the VM context */ 987 /* Release the VM context */
916 amdgpu_vm_fini(adev, avm); 988 amdgpu_vm_fini(adev, avm);
@@ -919,7 +991,7 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
919 991
920uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) 992uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
921{ 993{
922 struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; 994 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
923 995
924 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; 996 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
925} 997}
@@ -930,7 +1002,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
930 uint64_t *offset, uint32_t flags) 1002 uint64_t *offset, uint32_t flags)
931{ 1003{
932 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1004 struct amdgpu_device *adev = get_amdgpu_device(kgd);
933 struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; 1005 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
934 struct amdgpu_bo *bo; 1006 struct amdgpu_bo *bo;
935 int byte_align; 1007 int byte_align;
936 u32 alloc_domain; 1008 u32 alloc_domain;
@@ -1010,8 +1082,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1010 (*mem)->va = va; 1082 (*mem)->va = va;
1011 (*mem)->domain = alloc_domain; 1083 (*mem)->domain = alloc_domain;
1012 (*mem)->mapped_to_gpu_memory = 0; 1084 (*mem)->mapped_to_gpu_memory = 0;
1013 (*mem)->process_info = kfd_vm->process_info; 1085 (*mem)->process_info = avm->process_info;
1014 add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); 1086 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
1015 1087
1016 if (offset) 1088 if (offset)
1017 *offset = amdgpu_bo_mmap_offset(bo); 1089 *offset = amdgpu_bo_mmap_offset(bo);
@@ -1092,7 +1164,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1092 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) 1164 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
1093{ 1165{
1094 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1166 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1095 struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; 1167 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1096 int ret; 1168 int ret;
1097 struct amdgpu_bo *bo; 1169 struct amdgpu_bo *bo;
1098 uint32_t domain; 1170 uint32_t domain;
@@ -1128,19 +1200,19 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1128 if (unlikely(ret)) 1200 if (unlikely(ret))
1129 goto out; 1201 goto out;
1130 1202
1131 if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { 1203 if (check_if_add_bo_to_vm(avm, mem)) {
1132 ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, 1204 ret = add_bo_to_vm(adev, mem, avm, false,
1133 &bo_va_entry); 1205 &bo_va_entry);
1134 if (ret) 1206 if (ret)
1135 goto add_bo_to_vm_failed; 1207 goto add_bo_to_vm_failed;
1136 if (mem->aql_queue) { 1208 if (mem->aql_queue) {
1137 ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, 1209 ret = add_bo_to_vm(adev, mem, avm,
1138 true, &bo_va_entry_aql); 1210 true, &bo_va_entry_aql);
1139 if (ret) 1211 if (ret)
1140 goto add_bo_to_vm_failed_aql; 1212 goto add_bo_to_vm_failed_aql;
1141 } 1213 }
1142 } else { 1214 } else {
1143 ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); 1215 ret = vm_validate_pt_pd_bos(avm);
1144 if (unlikely(ret)) 1216 if (unlikely(ret))
1145 goto add_bo_to_vm_failed; 1217 goto add_bo_to_vm_failed;
1146 } 1218 }
@@ -1184,7 +1256,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1184 1256
1185 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) 1257 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
1186 amdgpu_bo_fence(bo, 1258 amdgpu_bo_fence(bo,
1187 &kfd_vm->process_info->eviction_fence->base, 1259 &avm->process_info->eviction_fence->base,
1188 true); 1260 true);
1189 ret = unreserve_bo_and_vms(&ctx, false, false); 1261 ret = unreserve_bo_and_vms(&ctx, false, false);
1190 1262
@@ -1209,7 +1281,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1209{ 1281{
1210 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1282 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1211 struct amdkfd_process_info *process_info = 1283 struct amdkfd_process_info *process_info =
1212 ((struct amdkfd_vm *)vm)->process_info; 1284 ((struct amdgpu_vm *)vm)->process_info;
1213 unsigned long bo_size = mem->bo->tbo.mem.size; 1285 unsigned long bo_size = mem->bo->tbo.mem.size;
1214 struct kfd_bo_va_list *entry; 1286 struct kfd_bo_va_list *entry;
1215 struct bo_vm_reservation_context ctx; 1287 struct bo_vm_reservation_context ctx;
@@ -1226,7 +1298,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1226 goto unreserve_out; 1298 goto unreserve_out;
1227 } 1299 }
1228 1300
1229 ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); 1301 ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
1230 if (unlikely(ret)) 1302 if (unlikely(ret))
1231 goto unreserve_out; 1303 goto unreserve_out;
1232 1304
@@ -1368,7 +1440,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
1368{ 1440{
1369 struct amdgpu_bo_list_entry *pd_bo_list; 1441 struct amdgpu_bo_list_entry *pd_bo_list;
1370 struct amdkfd_process_info *process_info = info; 1442 struct amdkfd_process_info *process_info = info;
1371 struct amdkfd_vm *peer_vm; 1443 struct amdgpu_vm *peer_vm;
1372 struct kgd_mem *mem; 1444 struct kgd_mem *mem;
1373 struct bo_vm_reservation_context ctx; 1445 struct bo_vm_reservation_context ctx;
1374 struct amdgpu_amdkfd_fence *new_fence; 1446 struct amdgpu_amdkfd_fence *new_fence;
@@ -1390,8 +1462,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
1390 mutex_lock(&process_info->lock); 1462 mutex_lock(&process_info->lock);
1391 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1463 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1392 vm_list_node) 1464 vm_list_node)
1393 amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, 1465 amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
1394 &pd_bo_list[i++]);
1395 1466
1396 /* Reserve all BOs and page tables/directory. Add all BOs from 1467 /* Reserve all BOs and page tables/directory. Add all BOs from
1397 * kfd_bo_list to ctx.list 1468 * kfd_bo_list to ctx.list
@@ -1422,7 +1493,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
1422 /* FIXME: I think this isn't needed */ 1493 /* FIXME: I think this isn't needed */
1423 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1494 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1424 vm_list_node) { 1495 vm_list_node) {
1425 struct amdgpu_bo *bo = peer_vm->base.root.base.bo; 1496 struct amdgpu_bo *bo = peer_vm->root.base.bo;
1426 1497
1427 ttm_bo_wait(&bo->tbo, false, false); 1498 ttm_bo_wait(&bo->tbo, false, false);
1428 } 1499 }
@@ -1491,7 +1562,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
1491 /* Attach eviction fence to PD / PT BOs */ 1562 /* Attach eviction fence to PD / PT BOs */
1492 list_for_each_entry(peer_vm, &process_info->vm_list_head, 1563 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1493 vm_list_node) { 1564 vm_list_node) {
1494 struct amdgpu_bo *bo = peer_vm->base.root.base.bo; 1565 struct amdgpu_bo *bo = peer_vm->root.base.bo;
1495 1566
1496 amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); 1567 amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
1497 } 1568 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 24474294c92a..da55a78d7380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -32,6 +32,7 @@
32#include <drm/amdgpu_drm.h> 32#include <drm/amdgpu_drm.h>
33#include "amdgpu.h" 33#include "amdgpu.h"
34#include "amdgpu_trace.h" 34#include "amdgpu_trace.h"
35#include "amdgpu_amdkfd.h"
35 36
36/* 37/*
37 * GPUVM 38 * GPUVM
@@ -2405,8 +2406,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2405 if (vm->use_cpu_for_update) 2406 if (vm->use_cpu_for_update)
2406 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 2407 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2407 else 2408 else
2408 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 2409 flags |= AMDGPU_GEM_CREATE_SHADOW;
2409 AMDGPU_GEM_CREATE_SHADOW);
2410 2410
2411 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); 2411 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2412 r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, 2412 r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
@@ -2462,6 +2462,73 @@ error_free_sched_entity:
2462} 2462}
2463 2463
2464/** 2464/**
2465 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2466 *
2467 * This only works on GFX VMs that don't have any BOs added and no
2468 * page tables allocated yet.
2469 *
2470 * Changes the following VM parameters:
2471 * - use_cpu_for_update
2472 * - pte_supports_ats
2473 * - pasid (old PASID is released, because compute manages its own PASIDs)
2474 *
2475 * Reinitializes the page directory to reflect the changed ATS
2476 * setting. May leave behind an unused shadow BO for the page
2477 * directory when switching from SDMA updates to CPU updates.
2478 *
2479 * Returns 0 for success, -errno for errors.
2480 */
2481int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2482{
2483 bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2484 int r;
2485
2486 r = amdgpu_bo_reserve(vm->root.base.bo, true);
2487 if (r)
2488 return r;
2489
2490 /* Sanity checks */
2491 if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2492 r = -EINVAL;
2493 goto error;
2494 }
2495
2496 /* Check if PD needs to be reinitialized and do it before
2497 * changing any other state, in case it fails.
2498 */
2499 if (pte_support_ats != vm->pte_support_ats) {
2500 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2501 adev->vm_manager.root_level,
2502 pte_support_ats);
2503 if (r)
2504 goto error;
2505 }
2506
2507 /* Update VM state */
2508 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2509 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2510 vm->pte_support_ats = pte_support_ats;
2511 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2512 vm->use_cpu_for_update ? "CPU" : "SDMA");
2513 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2514 "CPU update of VM recommended only for large BAR system\n");
2515
2516 if (vm->pasid) {
2517 unsigned long flags;
2518
2519 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2520 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2521 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2522
2523 vm->pasid = 0;
2524 }
2525
2526error:
2527 amdgpu_bo_unreserve(vm->root.base.bo);
2528 return r;
2529}
2530
2531/**
2465 * amdgpu_vm_free_levels - free PD/PT levels 2532 * amdgpu_vm_free_levels - free PD/PT levels
2466 * 2533 *
2467 * @adev: amdgpu device structure 2534 * @adev: amdgpu device structure
@@ -2508,6 +2575,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2508 u64 fault; 2575 u64 fault;
2509 int i, r; 2576 int i, r;
2510 2577
2578 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2579
2511 /* Clear pending page faults from IH when the VM is destroyed */ 2580 /* Clear pending page faults from IH when the VM is destroyed */
2512 while (kfifo_get(&vm->faults, &fault)) 2581 while (kfifo_get(&vm->faults, &fault))
2513 amdgpu_ih_clear_fault(adev, fault); 2582 amdgpu_ih_clear_fault(adev, fault);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index e9841518343e..30f080364c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -207,6 +207,15 @@ struct amdgpu_vm {
207 207
208 /* Limit non-retry fault storms */ 208 /* Limit non-retry fault storms */
209 unsigned int fault_credit; 209 unsigned int fault_credit;
210
211 /* Points to the KFD process VM info */
212 struct amdkfd_process_info *process_info;
213
214 /* List node in amdkfd_process_info.vm_list_head */
215 struct list_head vm_list_node;
216
217 /* Valid while the PD is reserved or fenced */
218 uint64_t pd_phys_addr;
210}; 219};
211 220
212struct amdgpu_vm_manager { 221struct amdgpu_vm_manager {
@@ -251,6 +260,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
251void amdgpu_vm_manager_fini(struct amdgpu_device *adev); 260void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
252int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 261int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
253 int vm_context, unsigned int pasid); 262 int vm_context, unsigned int pasid);
263int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
254void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); 264void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
255bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, 265bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
256 unsigned int pasid); 266 unsigned int pasid);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6fe24964540b..cd679cf1fd30 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -24,6 +24,7 @@
24#include <linux/export.h> 24#include <linux/export.h>
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/file.h>
27#include <linux/sched.h> 28#include <linux/sched.h>
28#include <linux/slab.h> 29#include <linux/slab.h>
29#include <linux/uaccess.h> 30#include <linux/uaccess.h>
@@ -825,12 +826,155 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
825 return 0; 826 return 0;
826} 827}
827 828
829static int kfd_ioctl_get_process_apertures_new(struct file *filp,
830 struct kfd_process *p, void *data)
831{
832 struct kfd_ioctl_get_process_apertures_new_args *args = data;
833 struct kfd_process_device_apertures *pa;
834 struct kfd_process_device *pdd;
835 uint32_t nodes = 0;
836 int ret;
837
838 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
839
840 if (args->num_of_nodes == 0) {
841 /* Return number of nodes, so that user space can alloacate
842 * sufficient memory
843 */
844 mutex_lock(&p->mutex);
845
846 if (!kfd_has_process_device_data(p))
847 goto out_unlock;
848
849 /* Run over all pdd of the process */
850 pdd = kfd_get_first_process_device_data(p);
851 do {
852 args->num_of_nodes++;
853 pdd = kfd_get_next_process_device_data(p, pdd);
854 } while (pdd);
855
856 goto out_unlock;
857 }
858
859 /* Fill in process-aperture information for all available
860 * nodes, but not more than args->num_of_nodes as that is
861 * the amount of memory allocated by user
862 */
863 pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
864 args->num_of_nodes), GFP_KERNEL);
865 if (!pa)
866 return -ENOMEM;
867
868 mutex_lock(&p->mutex);
869
870 if (!kfd_has_process_device_data(p)) {
871 args->num_of_nodes = 0;
872 kfree(pa);
873 goto out_unlock;
874 }
875
876 /* Run over all pdd of the process */
877 pdd = kfd_get_first_process_device_data(p);
878 do {
879 pa[nodes].gpu_id = pdd->dev->id;
880 pa[nodes].lds_base = pdd->lds_base;
881 pa[nodes].lds_limit = pdd->lds_limit;
882 pa[nodes].gpuvm_base = pdd->gpuvm_base;
883 pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
884 pa[nodes].scratch_base = pdd->scratch_base;
885 pa[nodes].scratch_limit = pdd->scratch_limit;
886
887 dev_dbg(kfd_device,
888 "gpu id %u\n", pdd->dev->id);
889 dev_dbg(kfd_device,
890 "lds_base %llX\n", pdd->lds_base);
891 dev_dbg(kfd_device,
892 "lds_limit %llX\n", pdd->lds_limit);
893 dev_dbg(kfd_device,
894 "gpuvm_base %llX\n", pdd->gpuvm_base);
895 dev_dbg(kfd_device,
896 "gpuvm_limit %llX\n", pdd->gpuvm_limit);
897 dev_dbg(kfd_device,
898 "scratch_base %llX\n", pdd->scratch_base);
899 dev_dbg(kfd_device,
900 "scratch_limit %llX\n", pdd->scratch_limit);
901 nodes++;
902
903 pdd = kfd_get_next_process_device_data(p, pdd);
904 } while (pdd && (nodes < args->num_of_nodes));
905 mutex_unlock(&p->mutex);
906
907 args->num_of_nodes = nodes;
908 ret = copy_to_user(
909 (void __user *)args->kfd_process_device_apertures_ptr,
910 pa,
911 (nodes * sizeof(struct kfd_process_device_apertures)));
912 kfree(pa);
913 return ret ? -EFAULT : 0;
914
915out_unlock:
916 mutex_unlock(&p->mutex);
917 return 0;
918}
919
828static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 920static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
829 void *data) 921 void *data)
830{ 922{
831 struct kfd_ioctl_create_event_args *args = data; 923 struct kfd_ioctl_create_event_args *args = data;
832 int err; 924 int err;
833 925
926 /* For dGPUs the event page is allocated in user mode. The
927 * handle is passed to KFD with the first call to this IOCTL
928 * through the event_page_offset field.
929 */
930 if (args->event_page_offset) {
931 struct kfd_dev *kfd;
932 struct kfd_process_device *pdd;
933 void *mem, *kern_addr;
934 uint64_t size;
935
936 if (p->signal_page) {
937 pr_err("Event page is already set\n");
938 return -EINVAL;
939 }
940
941 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
942 if (!kfd) {
943 pr_err("Getting device by id failed in %s\n", __func__);
944 return -EINVAL;
945 }
946
947 mutex_lock(&p->mutex);
948 pdd = kfd_bind_process_to_device(kfd, p);
949 if (IS_ERR(pdd)) {
950 err = PTR_ERR(pdd);
951 goto out_unlock;
952 }
953
954 mem = kfd_process_device_translate_handle(pdd,
955 GET_IDR_HANDLE(args->event_page_offset));
956 if (!mem) {
957 pr_err("Can't find BO, offset is 0x%llx\n",
958 args->event_page_offset);
959 err = -EINVAL;
960 goto out_unlock;
961 }
962 mutex_unlock(&p->mutex);
963
964 err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
965 mem, &kern_addr, &size);
966 if (err) {
967 pr_err("Failed to map event page to kernel\n");
968 return err;
969 }
970
971 err = kfd_event_page_set(p, kern_addr, size);
972 if (err) {
973 pr_err("Failed to set event page\n");
974 return err;
975 }
976 }
977
834 err = kfd_event_create(filp, p, args->event_type, 978 err = kfd_event_create(filp, p, args->event_type,
835 args->auto_reset != 0, args->node_id, 979 args->auto_reset != 0, args->node_id,
836 &args->event_id, &args->event_trigger_data, 980 &args->event_id, &args->event_trigger_data,
@@ -838,6 +982,10 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
838 &args->event_slot_index); 982 &args->event_slot_index);
839 983
840 return err; 984 return err;
985
986out_unlock:
987 mutex_unlock(&p->mutex);
988 return err;
841} 989}
842 990
843static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 991static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
@@ -955,6 +1103,371 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
955 return 0; 1103 return 0;
956} 1104}
957 1105
1106static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1107 void *data)
1108{
1109 struct kfd_ioctl_acquire_vm_args *args = data;
1110 struct kfd_process_device *pdd;
1111 struct kfd_dev *dev;
1112 struct file *drm_file;
1113 int ret;
1114
1115 dev = kfd_device_by_id(args->gpu_id);
1116 if (!dev)
1117 return -EINVAL;
1118
1119 drm_file = fget(args->drm_fd);
1120 if (!drm_file)
1121 return -EINVAL;
1122
1123 mutex_lock(&p->mutex);
1124
1125 pdd = kfd_get_process_device_data(dev, p);
1126 if (!pdd) {
1127 ret = -EINVAL;
1128 goto err_unlock;
1129 }
1130
1131 if (pdd->drm_file) {
1132 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1133 goto err_unlock;
1134 }
1135
1136 ret = kfd_process_device_init_vm(pdd, drm_file);
1137 if (ret)
1138 goto err_unlock;
1139 /* On success, the PDD keeps the drm_file reference */
1140 mutex_unlock(&p->mutex);
1141
1142 return 0;
1143
1144err_unlock:
1145 mutex_unlock(&p->mutex);
1146 fput(drm_file);
1147 return ret;
1148}
1149
1150bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1151{
1152 struct kfd_local_mem_info mem_info;
1153
1154 if (debug_largebar) {
1155 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1156 return true;
1157 }
1158
1159 if (dev->device_info->needs_iommu_device)
1160 return false;
1161
1162 dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
1163 if (mem_info.local_mem_size_private == 0 &&
1164 mem_info.local_mem_size_public > 0)
1165 return true;
1166 return false;
1167}
1168
1169static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1170 struct kfd_process *p, void *data)
1171{
1172 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1173 struct kfd_process_device *pdd;
1174 void *mem;
1175 struct kfd_dev *dev;
1176 int idr_handle;
1177 long err;
1178 uint64_t offset = args->mmap_offset;
1179 uint32_t flags = args->flags;
1180
1181 if (args->size == 0)
1182 return -EINVAL;
1183
1184 dev = kfd_device_by_id(args->gpu_id);
1185 if (!dev)
1186 return -EINVAL;
1187
1188 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1189 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1190 !kfd_dev_is_large_bar(dev)) {
1191 pr_err("Alloc host visible vram on small bar is not allowed\n");
1192 return -EINVAL;
1193 }
1194
1195 mutex_lock(&p->mutex);
1196
1197 pdd = kfd_bind_process_to_device(dev, p);
1198 if (IS_ERR(pdd)) {
1199 err = PTR_ERR(pdd);
1200 goto err_unlock;
1201 }
1202
1203 err = dev->kfd2kgd->alloc_memory_of_gpu(
1204 dev->kgd, args->va_addr, args->size,
1205 pdd->vm, (struct kgd_mem **) &mem, &offset,
1206 flags);
1207
1208 if (err)
1209 goto err_unlock;
1210
1211 idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1212 if (idr_handle < 0) {
1213 err = -EFAULT;
1214 goto err_free;
1215 }
1216
1217 mutex_unlock(&p->mutex);
1218
1219 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1220 args->mmap_offset = offset;
1221
1222 return 0;
1223
1224err_free:
1225 dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1226err_unlock:
1227 mutex_unlock(&p->mutex);
1228 return err;
1229}
1230
1231static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1232 struct kfd_process *p, void *data)
1233{
1234 struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1235 struct kfd_process_device *pdd;
1236 void *mem;
1237 struct kfd_dev *dev;
1238 int ret;
1239
1240 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1241 if (!dev)
1242 return -EINVAL;
1243
1244 mutex_lock(&p->mutex);
1245
1246 pdd = kfd_get_process_device_data(dev, p);
1247 if (!pdd) {
1248 pr_err("Process device data doesn't exist\n");
1249 ret = -EINVAL;
1250 goto err_unlock;
1251 }
1252
1253 mem = kfd_process_device_translate_handle(
1254 pdd, GET_IDR_HANDLE(args->handle));
1255 if (!mem) {
1256 ret = -EINVAL;
1257 goto err_unlock;
1258 }
1259
1260 ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1261
1262 /* If freeing the buffer failed, leave the handle in place for
1263 * clean-up during process tear-down.
1264 */
1265 if (!ret)
1266 kfd_process_device_remove_obj_handle(
1267 pdd, GET_IDR_HANDLE(args->handle));
1268
1269err_unlock:
1270 mutex_unlock(&p->mutex);
1271 return ret;
1272}
1273
1274static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1275 struct kfd_process *p, void *data)
1276{
1277 struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1278 struct kfd_process_device *pdd, *peer_pdd;
1279 void *mem;
1280 struct kfd_dev *dev, *peer;
1281 long err = 0;
1282 int i;
1283 uint32_t *devices_arr = NULL;
1284
1285 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1286 if (!dev)
1287 return -EINVAL;
1288
1289 if (!args->n_devices) {
1290 pr_debug("Device IDs array empty\n");
1291 return -EINVAL;
1292 }
1293 if (args->n_success > args->n_devices) {
1294 pr_debug("n_success exceeds n_devices\n");
1295 return -EINVAL;
1296 }
1297
1298 devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
1299 GFP_KERNEL);
1300 if (!devices_arr)
1301 return -ENOMEM;
1302
1303 err = copy_from_user(devices_arr,
1304 (void __user *)args->device_ids_array_ptr,
1305 args->n_devices * sizeof(*devices_arr));
1306 if (err != 0) {
1307 err = -EFAULT;
1308 goto copy_from_user_failed;
1309 }
1310
1311 mutex_lock(&p->mutex);
1312
1313 pdd = kfd_bind_process_to_device(dev, p);
1314 if (IS_ERR(pdd)) {
1315 err = PTR_ERR(pdd);
1316 goto bind_process_to_device_failed;
1317 }
1318
1319 mem = kfd_process_device_translate_handle(pdd,
1320 GET_IDR_HANDLE(args->handle));
1321 if (!mem) {
1322 err = -ENOMEM;
1323 goto get_mem_obj_from_handle_failed;
1324 }
1325
1326 for (i = args->n_success; i < args->n_devices; i++) {
1327 peer = kfd_device_by_id(devices_arr[i]);
1328 if (!peer) {
1329 pr_debug("Getting device by id failed for 0x%x\n",
1330 devices_arr[i]);
1331 err = -EINVAL;
1332 goto get_mem_obj_from_handle_failed;
1333 }
1334
1335 peer_pdd = kfd_bind_process_to_device(peer, p);
1336 if (IS_ERR(peer_pdd)) {
1337 err = PTR_ERR(peer_pdd);
1338 goto get_mem_obj_from_handle_failed;
1339 }
1340 err = peer->kfd2kgd->map_memory_to_gpu(
1341 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1342 if (err) {
1343 pr_err("Failed to map to gpu %d/%d\n",
1344 i, args->n_devices);
1345 goto map_memory_to_gpu_failed;
1346 }
1347 args->n_success = i+1;
1348 }
1349
1350 mutex_unlock(&p->mutex);
1351
1352 err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1353 if (err) {
1354 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1355 goto sync_memory_failed;
1356 }
1357
1358 /* Flush TLBs after waiting for the page table updates to complete */
1359 for (i = 0; i < args->n_devices; i++) {
1360 peer = kfd_device_by_id(devices_arr[i]);
1361 if (WARN_ON_ONCE(!peer))
1362 continue;
1363 peer_pdd = kfd_get_process_device_data(peer, p);
1364 if (WARN_ON_ONCE(!peer_pdd))
1365 continue;
1366 kfd_flush_tlb(peer_pdd);
1367 }
1368
1369 kfree(devices_arr);
1370
1371 return err;
1372
1373bind_process_to_device_failed:
1374get_mem_obj_from_handle_failed:
1375map_memory_to_gpu_failed:
1376 mutex_unlock(&p->mutex);
1377copy_from_user_failed:
1378sync_memory_failed:
1379 kfree(devices_arr);
1380
1381 return err;
1382}
1383
1384static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1385 struct kfd_process *p, void *data)
1386{
1387 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1388 struct kfd_process_device *pdd, *peer_pdd;
1389 void *mem;
1390 struct kfd_dev *dev, *peer;
1391 long err = 0;
1392 uint32_t *devices_arr = NULL, i;
1393
1394 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1395 if (!dev)
1396 return -EINVAL;
1397
1398 if (!args->n_devices) {
1399 pr_debug("Device IDs array empty\n");
1400 return -EINVAL;
1401 }
1402 if (args->n_success > args->n_devices) {
1403 pr_debug("n_success exceeds n_devices\n");
1404 return -EINVAL;
1405 }
1406
1407 devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
1408 GFP_KERNEL);
1409 if (!devices_arr)
1410 return -ENOMEM;
1411
1412 err = copy_from_user(devices_arr,
1413 (void __user *)args->device_ids_array_ptr,
1414 args->n_devices * sizeof(*devices_arr));
1415 if (err != 0) {
1416 err = -EFAULT;
1417 goto copy_from_user_failed;
1418 }
1419
1420 mutex_lock(&p->mutex);
1421
1422 pdd = kfd_get_process_device_data(dev, p);
1423 if (!pdd) {
1424 err = PTR_ERR(pdd);
1425 goto bind_process_to_device_failed;
1426 }
1427
1428 mem = kfd_process_device_translate_handle(pdd,
1429 GET_IDR_HANDLE(args->handle));
1430 if (!mem) {
1431 err = -ENOMEM;
1432 goto get_mem_obj_from_handle_failed;
1433 }
1434
1435 for (i = args->n_success; i < args->n_devices; i++) {
1436 peer = kfd_device_by_id(devices_arr[i]);
1437 if (!peer) {
1438 err = -EINVAL;
1439 goto get_mem_obj_from_handle_failed;
1440 }
1441
1442 peer_pdd = kfd_get_process_device_data(peer, p);
1443 if (!peer_pdd) {
1444 err = -ENODEV;
1445 goto get_mem_obj_from_handle_failed;
1446 }
1447 err = dev->kfd2kgd->unmap_memory_to_gpu(
1448 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1449 if (err) {
1450 pr_err("Failed to unmap from gpu %d/%d\n",
1451 i, args->n_devices);
1452 goto unmap_memory_from_gpu_failed;
1453 }
1454 args->n_success = i+1;
1455 }
1456 kfree(devices_arr);
1457
1458 mutex_unlock(&p->mutex);
1459
1460 return 0;
1461
1462bind_process_to_device_failed:
1463get_mem_obj_from_handle_failed:
1464unmap_memory_from_gpu_failed:
1465 mutex_unlock(&p->mutex);
1466copy_from_user_failed:
1467 kfree(devices_arr);
1468 return err;
1469}
1470
958#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1471#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
959 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1472 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
960 .cmd_drv = 0, .name = #ioctl} 1473 .cmd_drv = 0, .name = #ioctl}
@@ -1017,6 +1530,25 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1017 1530
1018 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1531 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1019 kfd_ioctl_set_trap_handler, 0), 1532 kfd_ioctl_set_trap_handler, 0),
1533
1534 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1535 kfd_ioctl_get_process_apertures_new, 0),
1536
1537 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1538 kfd_ioctl_acquire_vm, 0),
1539
1540 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1541 kfd_ioctl_alloc_memory_of_gpu, 0),
1542
1543 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1544 kfd_ioctl_free_memory_of_gpu, 0),
1545
1546 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1547 kfd_ioctl_map_memory_to_gpu, 0),
1548
1549 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1550 kfd_ioctl_unmap_memory_from_gpu, 0),
1551
1020}; 1552};
1021 1553
1022#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1554#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 7493f47e7fe1..4f126ef6139b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -882,7 +882,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
882 crat_table->length = sizeof(struct crat_header); 882 crat_table->length = sizeof(struct crat_header);
883 883
884 status = acpi_get_table("DSDT", 0, &acpi_table); 884 status = acpi_get_table("DSDT", 0, &acpi_table);
885 if (status == AE_NOT_FOUND) 885 if (status != AE_OK)
886 pr_warn("DSDT table not found for OEM information\n"); 886 pr_warn("DSDT table not found for OEM information\n");
887 else { 887 else {
888 crat_table->oem_revision = acpi_table->revision; 888 crat_table->oem_revision = acpi_table->revision;
@@ -1117,6 +1117,9 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
1117 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1117 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1118 sub_type_hdr->length); 1118 sub_type_hdr->length);
1119 1119
1120 if (debug_largebar)
1121 local_mem_info.local_mem_size_private = 0;
1122
1120 if (local_mem_info.local_mem_size_private == 0) 1123 if (local_mem_info.local_mem_size_private == 0)
1121 ret = kfd_fill_gpu_memory_affinity(&avail_size, 1124 ret = kfd_fill_gpu_memory_affinity(&avail_size,
1122 kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, 1125 kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b3b6dab71638..c18e048f23c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
142 return 0; 142 return 0;
143} 143}
144 144
145static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
146 struct qcm_process_device *qpd)
147{
148 uint32_t len;
149
150 if (!qpd->ib_kaddr)
151 return -ENOMEM;
152
153 len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
154
155 return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
156 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
157}
158
145static void deallocate_vmid(struct device_queue_manager *dqm, 159static void deallocate_vmid(struct device_queue_manager *dqm,
146 struct qcm_process_device *qpd, 160 struct qcm_process_device *qpd,
147 struct queue *q) 161 struct queue *q)
148{ 162{
149 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 163 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
150 164
165 /* On GFX v7, CP doesn't flush TC at dequeue */
166 if (q->device->device_info->asic_family == CHIP_HAWAII)
167 if (flush_texture_cache_nocpsch(q->device, qpd))
168 pr_err("Failed to flush TC\n");
169
151 kfd_flush_tlb(qpd_to_pdd(qpd)); 170 kfd_flush_tlb(qpd_to_pdd(qpd));
152 171
153 /* Release the vmid mapping */ 172 /* Release the vmid mapping */
@@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm)
792static int start_nocpsch(struct device_queue_manager *dqm) 811static int start_nocpsch(struct device_queue_manager *dqm)
793{ 812{
794 init_interrupts(dqm); 813 init_interrupts(dqm);
795 return 0; 814 return pm_init(&dqm->packets, dqm);
796} 815}
797 816
798static int stop_nocpsch(struct device_queue_manager *dqm) 817static int stop_nocpsch(struct device_queue_manager *dqm)
799{ 818{
819 pm_uninit(&dqm->packets);
800 return 0; 820 return 0;
801} 821}
802 822
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 6fb9c0d46d63..4890a90f1e44 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -52,6 +52,7 @@ struct kfd_event_waiter {
52struct kfd_signal_page { 52struct kfd_signal_page {
53 uint64_t *kernel_address; 53 uint64_t *kernel_address;
54 uint64_t __user *user_address; 54 uint64_t __user *user_address;
55 bool need_to_free_pages;
55}; 56};
56 57
57 58
@@ -79,6 +80,7 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
79 KFD_SIGNAL_EVENT_LIMIT * 8); 80 KFD_SIGNAL_EVENT_LIMIT * 8);
80 81
81 page->kernel_address = backing_store; 82 page->kernel_address = backing_store;
83 page->need_to_free_pages = true;
82 pr_debug("Allocated new event signal page at %p, for process %p\n", 84 pr_debug("Allocated new event signal page at %p, for process %p\n",
83 page, p); 85 page, p);
84 86
@@ -269,8 +271,9 @@ static void shutdown_signal_page(struct kfd_process *p)
269 struct kfd_signal_page *page = p->signal_page; 271 struct kfd_signal_page *page = p->signal_page;
270 272
271 if (page) { 273 if (page) {
272 free_pages((unsigned long)page->kernel_address, 274 if (page->need_to_free_pages)
273 get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); 275 free_pages((unsigned long)page->kernel_address,
276 get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
274 kfree(page); 277 kfree(page);
275 } 278 }
276} 279}
@@ -292,6 +295,30 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
292 return ev->type == KFD_EVENT_TYPE_SIGNAL; 295 return ev->type == KFD_EVENT_TYPE_SIGNAL;
293} 296}
294 297
298int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
299 uint64_t size)
300{
301 struct kfd_signal_page *page;
302
303 if (p->signal_page)
304 return -EBUSY;
305
306 page = kzalloc(sizeof(*page), GFP_KERNEL);
307 if (!page)
308 return -ENOMEM;
309
310 /* Initialize all events to unsignaled */
311 memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT,
312 KFD_SIGNAL_EVENT_LIMIT * 8);
313
314 page->kernel_address = kernel_address;
315
316 p->signal_page = page;
317 p->signal_mapped_size = size;
318
319 return 0;
320}
321
295int kfd_event_create(struct file *devkfd, struct kfd_process *p, 322int kfd_event_create(struct file *devkfd, struct kfd_process *p,
296 uint32_t event_type, bool auto_reset, uint32_t node_id, 323 uint32_t event_type, bool auto_reset, uint32_t node_id,
297 uint32_t *event_id, uint32_t *event_trigger_data, 324 uint32_t *event_id, uint32_t *event_trigger_data,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 7377513050e6..66852de410c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -278,21 +278,28 @@
278#define MAKE_GPUVM_APP_BASE(gpu_num) \ 278#define MAKE_GPUVM_APP_BASE(gpu_num) \
279 (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) 279 (((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
280 280
281#define MAKE_GPUVM_APP_LIMIT(base) \ 281#define MAKE_GPUVM_APP_LIMIT(base, size) \
282 (((uint64_t)(base) & \ 282 (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
283 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
284 283
285#define MAKE_SCRATCH_APP_BASE(gpu_num) \ 284#define MAKE_SCRATCH_APP_BASE() \
286 (((uint64_t)(gpu_num) << 61) + 0x100000000L) 285 (((uint64_t)(0x1UL) << 61) + 0x100000000L)
287 286
288#define MAKE_SCRATCH_APP_LIMIT(base) \ 287#define MAKE_SCRATCH_APP_LIMIT(base) \
289 (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) 288 (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
290 289
291#define MAKE_LDS_APP_BASE(gpu_num) \ 290#define MAKE_LDS_APP_BASE() \
292 (((uint64_t)(gpu_num) << 61) + 0x0) 291 (((uint64_t)(0x1UL) << 61) + 0x0)
293#define MAKE_LDS_APP_LIMIT(base) \ 292#define MAKE_LDS_APP_LIMIT(base) \
294 (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) 293 (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
295 294
295/* User mode manages most of the SVM aperture address space. The low
296 * 16MB are reserved for kernel use (CWSR trap handler and kernel IB
297 * for now).
298 */
299#define SVM_USER_BASE 0x1000000ull
300#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
301#define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE)
302
296int kfd_init_apertures(struct kfd_process *process) 303int kfd_init_apertures(struct kfd_process *process)
297{ 304{
298 uint8_t id = 0; 305 uint8_t id = 0;
@@ -314,7 +321,7 @@ int kfd_init_apertures(struct kfd_process *process)
314 return -1; 321 return -1;
315 } 322 }
316 /* 323 /*
317 * For 64 bit process aperture will be statically reserved in 324 * For 64 bit process apertures will be statically reserved in
318 * the x86_64 non canonical process address space 325 * the x86_64 non canonical process address space
319 * amdkfd doesn't currently support apertures for 32 bit process 326 * amdkfd doesn't currently support apertures for 32 bit process
320 */ 327 */
@@ -323,23 +330,35 @@ int kfd_init_apertures(struct kfd_process *process)
323 pdd->gpuvm_base = pdd->gpuvm_limit = 0; 330 pdd->gpuvm_base = pdd->gpuvm_limit = 0;
324 pdd->scratch_base = pdd->scratch_limit = 0; 331 pdd->scratch_base = pdd->scratch_limit = 0;
325 } else { 332 } else {
326 /* 333 /* Same LDS and scratch apertures can be used
327 * node id couldn't be 0 - the three MSB bits of 334 * on all GPUs. This allows using more dGPUs
328 * aperture shoudn't be 0 335 * than placement options for apertures.
329 */ 336 */
330 pdd->lds_base = MAKE_LDS_APP_BASE(id + 1); 337 pdd->lds_base = MAKE_LDS_APP_BASE();
331
332 pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); 338 pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
333 339
334 pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); 340 pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
335
336 pdd->gpuvm_limit =
337 MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
338
339 pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
340
341 pdd->scratch_limit = 341 pdd->scratch_limit =
342 MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); 342 MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
343
344 if (dev->device_info->needs_iommu_device) {
345 /* APUs: GPUVM aperture in
346 * non-canonical address space
347 */
348 pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
349 pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(
350 pdd->gpuvm_base,
351 dev->shared_resources.gpuvm_size);
352 } else {
353 /* dGPUs: SVM aperture starting at 0
354 * with small reserved space for kernel
355 */
356 pdd->gpuvm_base = SVM_USER_BASE;
357 pdd->gpuvm_limit =
358 dev->shared_resources.gpuvm_size - 1;
359 pdd->qpd.cwsr_base = SVM_CWSR_BASE;
360 pdd->qpd.ib_base = SVM_IB_BASE;
361 }
343 } 362 }
344 363
345 dev_dbg(kfd_device, "node id %u\n", id); 364 dev_dbg(kfd_device, "node id %u\n", id);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 65574c6a10b3..e0c07d24d251 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -71,6 +71,11 @@ module_param(send_sigterm, int, 0444);
71MODULE_PARM_DESC(send_sigterm, 71MODULE_PARM_DESC(send_sigterm,
72 "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); 72 "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
73 73
74int debug_largebar;
75module_param(debug_largebar, int, 0444);
76MODULE_PARM_DESC(debug_largebar,
77 "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
78
74int ignore_crat; 79int ignore_crat;
75module_param(ignore_crat, int, 0444); 80module_param(ignore_crat, int, 0444);
76MODULE_PARM_DESC(ignore_crat, 81MODULE_PARM_DESC(ignore_crat,
@@ -128,7 +133,9 @@ static int __init kfd_module_init(void)
128 if (err < 0) 133 if (err < 0)
129 goto err_topology; 134 goto err_topology;
130 135
131 kfd_process_create_wq(); 136 err = kfd_process_create_wq();
137 if (err < 0)
138 goto err_create_wq;
132 139
133 kfd_debugfs_init(); 140 kfd_debugfs_init();
134 141
@@ -138,6 +145,8 @@ static int __init kfd_module_init(void)
138 145
139 return 0; 146 return 0;
140 147
148err_create_wq:
149 kfd_topology_shutdown();
141err_topology: 150err_topology:
142 kfd_chardev_exit(); 151 kfd_chardev_exit();
143err_ioctl: 152err_ioctl:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 0ecbd1f9b606..7614375489a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
356 return retval; 356 return retval;
357} 357}
358 358
359/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
360 * of this packet
361 * @gpu_addr - GPU address of the packet. It's a virtual address.
362 * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
363 * Return - length of the packet
364 */
365uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
366{
367 struct pm4_mec_release_mem *packet;
368
369 WARN_ON(!buffer);
370
371 packet = (struct pm4_mec_release_mem *)buffer;
372 memset(buffer, 0, sizeof(*packet));
373
374 packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
375 sizeof(*packet));
376
377 packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
378 packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
379 packet->bitfields2.tcl1_action_ena = 1;
380 packet->bitfields2.tc_action_ena = 1;
381 packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
382 packet->bitfields2.atc = 0;
383
384 packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
385 packet->bitfields3.int_sel =
386 int_sel___release_mem__send_interrupt_after_write_confirm;
387
388 packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
389 packet->address_hi = upper_32_bits(gpu_addr);
390
391 packet->data_lo = 0;
392
393 return sizeof(*packet) / sizeof(unsigned int);
394}
395
359int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) 396int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
360{ 397{
361 pm->dqm = dqm; 398 pm->dqm = dqm;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index cac7aa258162..96a9cc0f02c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -105,6 +105,12 @@ extern int cwsr_enable;
105extern int send_sigterm; 105extern int send_sigterm;
106 106
107/* 107/*
108 * This kernel module is used to simulate large bar machine on non-large bar
109 * enabled machines.
110 */
111extern int debug_largebar;
112
113/*
108 * Ignore CRAT table during KFD initialization, can be used to work around 114 * Ignore CRAT table during KFD initialization, can be used to work around
109 * broken CRAT tables on some AMD systems 115 * broken CRAT tables on some AMD systems
110 */ 116 */
@@ -488,8 +494,13 @@ struct qcm_process_device {
488 494
489 /* CWSR memory */ 495 /* CWSR memory */
490 void *cwsr_kaddr; 496 void *cwsr_kaddr;
497 uint64_t cwsr_base;
491 uint64_t tba_addr; 498 uint64_t tba_addr;
492 uint64_t tma_addr; 499 uint64_t tma_addr;
500
501 /* IB memory */
502 uint64_t ib_base;
503 void *ib_kaddr;
493}; 504};
494 505
495/* KFD Memory Eviction */ 506/* KFD Memory Eviction */
@@ -504,6 +515,14 @@ struct qcm_process_device {
504int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, 515int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
505 struct dma_fence *fence); 516 struct dma_fence *fence);
506 517
518/* 8 byte handle containing GPU ID in the most significant 4 bytes and
519 * idr_handle in the least significant 4 bytes
520 */
521#define MAKE_HANDLE(gpu_id, idr_handle) \
522 (((uint64_t)(gpu_id) << 32) + idr_handle)
523#define GET_GPU_ID(handle) (handle >> 32)
524#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
525
507enum kfd_pdd_bound { 526enum kfd_pdd_bound {
508 PDD_UNBOUND = 0, 527 PDD_UNBOUND = 0,
509 PDD_BOUND, 528 PDD_BOUND,
@@ -536,8 +555,12 @@ struct kfd_process_device {
536 uint64_t scratch_limit; 555 uint64_t scratch_limit;
537 556
538 /* VM context for GPUVM allocations */ 557 /* VM context for GPUVM allocations */
558 struct file *drm_file;
539 void *vm; 559 void *vm;
540 560
561 /* GPUVM allocations storage */
562 struct idr alloc_idr;
563
541 /* Flag used to tell the pdd has dequeued from the dqm. 564 /* Flag used to tell the pdd has dequeued from the dqm.
542 * This is used to prevent dev->dqm->ops.process_termination() from 565 * This is used to prevent dev->dqm->ops.process_termination() from
543 * being called twice when it is already called in IOMMU callback 566 * being called twice when it is already called in IOMMU callback
@@ -651,7 +674,7 @@ struct amdkfd_ioctl_desc {
651 const char *name; 674 const char *name;
652}; 675};
653 676
654void kfd_process_create_wq(void); 677int kfd_process_create_wq(void);
655void kfd_process_destroy_wq(void); 678void kfd_process_destroy_wq(void);
656struct kfd_process *kfd_create_process(struct file *filep); 679struct kfd_process *kfd_create_process(struct file *filep);
657struct kfd_process *kfd_get_process(const struct task_struct *); 680struct kfd_process *kfd_get_process(const struct task_struct *);
@@ -661,6 +684,8 @@ void kfd_unref_process(struct kfd_process *p);
661void kfd_suspend_all_processes(void); 684void kfd_suspend_all_processes(void);
662int kfd_resume_all_processes(void); 685int kfd_resume_all_processes(void);
663 686
687int kfd_process_device_init_vm(struct kfd_process_device *pdd,
688 struct file *drm_file);
664struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 689struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
665 struct kfd_process *p); 690 struct kfd_process *p);
666struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 691struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
@@ -671,6 +696,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
671int kfd_reserved_mem_mmap(struct kfd_process *process, 696int kfd_reserved_mem_mmap(struct kfd_process *process,
672 struct vm_area_struct *vma); 697 struct vm_area_struct *vma);
673 698
699/* KFD process API for creating and translating handles */
700int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
701 void *mem);
702void *kfd_process_device_translate_handle(struct kfd_process_device *p,
703 int handle);
704void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
705 int handle);
706
674/* Process device data iterator */ 707/* Process device data iterator */
675struct kfd_process_device *kfd_get_first_process_device_data( 708struct kfd_process_device *kfd_get_first_process_device_data(
676 struct kfd_process *p); 709 struct kfd_process *p);
@@ -816,6 +849,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
816 849
817void pm_release_ib(struct packet_manager *pm); 850void pm_release_ib(struct packet_manager *pm);
818 851
852uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
853
819uint64_t kfd_get_number_elems(struct kfd_dev *kfd); 854uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
820 855
821/* Events */ 856/* Events */
@@ -837,6 +872,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
837void kfd_signal_hw_exception_event(unsigned int pasid); 872void kfd_signal_hw_exception_event(unsigned int pasid);
838int kfd_set_event(struct kfd_process *p, uint32_t event_id); 873int kfd_set_event(struct kfd_process *p, uint32_t event_id);
839int kfd_reset_event(struct kfd_process *p, uint32_t event_id); 874int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
875int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
876 uint64_t size);
840int kfd_event_create(struct file *devkfd, struct kfd_process *p, 877int kfd_event_create(struct file *devkfd, struct kfd_process *p,
841 uint32_t event_type, bool auto_reset, uint32_t node_id, 878 uint32_t event_type, bool auto_reset, uint32_t node_id,
842 uint32_t *event_id, uint32_t *event_trigger_data, 879 uint32_t *event_id, uint32_t *event_trigger_data,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 18b2b86ad503..1711ad0642f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -30,6 +30,7 @@
30#include <linux/notifier.h> 30#include <linux/notifier.h>
31#include <linux/compat.h> 31#include <linux/compat.h>
32#include <linux/mman.h> 32#include <linux/mman.h>
33#include <linux/file.h>
33 34
34struct mm_struct; 35struct mm_struct;
35 36
@@ -47,22 +48,39 @@ static DEFINE_MUTEX(kfd_processes_mutex);
47 48
48DEFINE_SRCU(kfd_processes_srcu); 49DEFINE_SRCU(kfd_processes_srcu);
49 50
51/* For process termination handling */
50static struct workqueue_struct *kfd_process_wq; 52static struct workqueue_struct *kfd_process_wq;
51 53
54/* Ordered, single-threaded workqueue for restoring evicted
55 * processes. Restoring multiple processes concurrently under memory
56 * pressure can lead to processes blocking each other from validating
57 * their BOs and result in a live-lock situation where processes
58 * remain evicted indefinitely.
59 */
60static struct workqueue_struct *kfd_restore_wq;
61
52static struct kfd_process *find_process(const struct task_struct *thread); 62static struct kfd_process *find_process(const struct task_struct *thread);
53static void kfd_process_ref_release(struct kref *ref); 63static void kfd_process_ref_release(struct kref *ref);
54static struct kfd_process *create_process(const struct task_struct *thread, 64static struct kfd_process *create_process(const struct task_struct *thread,
55 struct file *filep); 65 struct file *filep);
56static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
57 66
58static void evict_process_worker(struct work_struct *work); 67static void evict_process_worker(struct work_struct *work);
59static void restore_process_worker(struct work_struct *work); 68static void restore_process_worker(struct work_struct *work);
60 69
61 70
62void kfd_process_create_wq(void) 71int kfd_process_create_wq(void)
63{ 72{
64 if (!kfd_process_wq) 73 if (!kfd_process_wq)
65 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); 74 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
75 if (!kfd_restore_wq)
76 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
77
78 if (!kfd_process_wq || !kfd_restore_wq) {
79 kfd_process_destroy_wq();
80 return -ENOMEM;
81 }
82
83 return 0;
66} 84}
67 85
68void kfd_process_destroy_wq(void) 86void kfd_process_destroy_wq(void)
@@ -71,6 +89,116 @@ void kfd_process_destroy_wq(void)
71 destroy_workqueue(kfd_process_wq); 89 destroy_workqueue(kfd_process_wq);
72 kfd_process_wq = NULL; 90 kfd_process_wq = NULL;
73 } 91 }
92 if (kfd_restore_wq) {
93 destroy_workqueue(kfd_restore_wq);
94 kfd_restore_wq = NULL;
95 }
96}
97
98static void kfd_process_free_gpuvm(struct kgd_mem *mem,
99 struct kfd_process_device *pdd)
100{
101 struct kfd_dev *dev = pdd->dev;
102
103 dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
104 dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
105}
106
107/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
108 * This function should be only called right after the process
109 * is created and when kfd_processes_mutex is still being held
110 * to avoid concurrency. Because of that exclusiveness, we do
111 * not need to take p->mutex.
112 */
113static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
114 uint64_t gpu_va, uint32_t size,
115 uint32_t flags, void **kptr)
116{
117 struct kfd_dev *kdev = pdd->dev;
118 struct kgd_mem *mem = NULL;
119 int handle;
120 int err;
121
122 err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
123 pdd->vm, &mem, NULL, flags);
124 if (err)
125 goto err_alloc_mem;
126
127 err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
128 if (err)
129 goto err_map_mem;
130
131 err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
132 if (err) {
133 pr_debug("Sync memory failed, wait interrupted by user signal\n");
134 goto sync_memory_failed;
135 }
136
137 /* Create an obj handle so kfd_process_device_remove_obj_handle
138 * will take care of the bo removal when the process finishes.
139 * We do not need to take p->mutex, because the process is just
140 * created and the ioctls have not had the chance to run.
141 */
142 handle = kfd_process_device_create_obj_handle(pdd, mem);
143
144 if (handle < 0) {
145 err = handle;
146 goto free_gpuvm;
147 }
148
149 if (kptr) {
150 err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
151 (struct kgd_mem *)mem, kptr, NULL);
152 if (err) {
153 pr_debug("Map GTT BO to kernel failed\n");
154 goto free_obj_handle;
155 }
156 }
157
158 return err;
159
160free_obj_handle:
161 kfd_process_device_remove_obj_handle(pdd, handle);
162free_gpuvm:
163sync_memory_failed:
164 kfd_process_free_gpuvm(mem, pdd);
165 return err;
166
167err_map_mem:
168 kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
169err_alloc_mem:
170 *kptr = NULL;
171 return err;
172}
173
174/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
175 * process for IB usage The memory reserved is for KFD to submit
176 * IB to AMDGPU from kernel. If the memory is reserved
177 * successfully, ib_kaddr will have the CPU/kernel
178 * address. Check ib_kaddr before accessing the memory.
179 */
180static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
181{
182 struct qcm_process_device *qpd = &pdd->qpd;
183 uint32_t flags = ALLOC_MEM_FLAGS_GTT |
184 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
185 ALLOC_MEM_FLAGS_WRITABLE |
186 ALLOC_MEM_FLAGS_EXECUTABLE;
187 void *kaddr;
188 int ret;
189
190 if (qpd->ib_kaddr || !qpd->ib_base)
191 return 0;
192
193 /* ib_base is only set for dGPU */
194 ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
195 &kaddr);
196 if (ret)
197 return ret;
198
199 qpd->ib_kaddr = kaddr;
200
201 return 0;
74} 202}
75 203
76struct kfd_process *kfd_create_process(struct file *filep) 204struct kfd_process *kfd_create_process(struct file *filep)
@@ -149,6 +277,40 @@ void kfd_unref_process(struct kfd_process *p)
149 kref_put(&p->ref, kfd_process_ref_release); 277 kref_put(&p->ref, kfd_process_ref_release);
150} 278}
151 279
280static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
281{
282 struct kfd_process *p = pdd->process;
283 void *mem;
284 int id;
285
286 /*
287 * Remove all handles from idr and release appropriate
288 * local memory object
289 */
290 idr_for_each_entry(&pdd->alloc_idr, mem, id) {
291 struct kfd_process_device *peer_pdd;
292
293 list_for_each_entry(peer_pdd, &p->per_device_data,
294 per_device_list) {
295 if (!peer_pdd->vm)
296 continue;
297 peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
298 peer_pdd->dev->kgd, mem, peer_pdd->vm);
299 }
300
301 pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
302 kfd_process_device_remove_obj_handle(pdd, id);
303 }
304}
305
306static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
307{
308 struct kfd_process_device *pdd;
309
310 list_for_each_entry(pdd, &p->per_device_data, per_device_list)
311 kfd_process_device_free_bos(pdd);
312}
313
152static void kfd_process_destroy_pdds(struct kfd_process *p) 314static void kfd_process_destroy_pdds(struct kfd_process *p)
153{ 315{
154 struct kfd_process_device *pdd, *temp; 316 struct kfd_process_device *pdd, *temp;
@@ -158,16 +320,20 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
158 pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", 320 pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
159 pdd->dev->id, p->pasid); 321 pdd->dev->id, p->pasid);
160 322
161 if (pdd->vm) 323 if (pdd->drm_file)
324 fput(pdd->drm_file);
325 else if (pdd->vm)
162 pdd->dev->kfd2kgd->destroy_process_vm( 326 pdd->dev->kfd2kgd->destroy_process_vm(
163 pdd->dev->kgd, pdd->vm); 327 pdd->dev->kgd, pdd->vm);
164 328
165 list_del(&pdd->per_device_list); 329 list_del(&pdd->per_device_list);
166 330
167 if (pdd->qpd.cwsr_kaddr) 331 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
168 free_pages((unsigned long)pdd->qpd.cwsr_kaddr, 332 free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
169 get_order(KFD_CWSR_TBA_TMA_SIZE)); 333 get_order(KFD_CWSR_TBA_TMA_SIZE));
170 334
335 idr_destroy(&pdd->alloc_idr);
336
171 kfree(pdd); 337 kfree(pdd);
172 } 338 }
173} 339}
@@ -184,6 +350,8 @@ static void kfd_process_wq_release(struct work_struct *work)
184 350
185 kfd_iommu_unbind_process(p); 351 kfd_iommu_unbind_process(p);
186 352
353 kfd_process_free_outstanding_kfd_bos(p);
354
187 kfd_process_destroy_pdds(p); 355 kfd_process_destroy_pdds(p);
188 dma_fence_put(p->ef); 356 dma_fence_put(p->ef);
189 357
@@ -271,18 +439,18 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
271 .release = kfd_process_notifier_release, 439 .release = kfd_process_notifier_release,
272}; 440};
273 441
274static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) 442static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
275{ 443{
276 unsigned long offset; 444 unsigned long offset;
277 struct kfd_process_device *pdd = NULL; 445 struct kfd_process_device *pdd;
278 struct kfd_dev *dev = NULL;
279 struct qcm_process_device *qpd = NULL;
280 446
281 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 447 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
282 dev = pdd->dev; 448 struct kfd_dev *dev = pdd->dev;
283 qpd = &pdd->qpd; 449 struct qcm_process_device *qpd = &pdd->qpd;
284 if (!dev->cwsr_enabled || qpd->cwsr_kaddr) 450
451 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
285 continue; 452 continue;
453
286 offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; 454 offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
287 qpd->tba_addr = (int64_t)vm_mmap(filep, 0, 455 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
288 KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, 456 KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
@@ -307,6 +475,36 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
307 return 0; 475 return 0;
308} 476}
309 477
478static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
479{
480 struct kfd_dev *dev = pdd->dev;
481 struct qcm_process_device *qpd = &pdd->qpd;
482 uint32_t flags = ALLOC_MEM_FLAGS_GTT |
483 ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
484 void *kaddr;
485 int ret;
486
487 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
488 return 0;
489
490 /* cwsr_base is only set for dGPU */
491 ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
492 KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
493 if (ret)
494 return ret;
495
496 qpd->cwsr_kaddr = kaddr;
497 qpd->tba_addr = qpd->cwsr_base;
498
499 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
500
501 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
502 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
503 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
504
505 return 0;
506}
507
310static struct kfd_process *create_process(const struct task_struct *thread, 508static struct kfd_process *create_process(const struct task_struct *thread,
311 struct file *filep) 509 struct file *filep)
312{ 510{
@@ -361,13 +559,14 @@ static struct kfd_process *create_process(const struct task_struct *thread,
361 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); 559 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
362 process->last_restore_timestamp = get_jiffies_64(); 560 process->last_restore_timestamp = get_jiffies_64();
363 561
364 err = kfd_process_init_cwsr(process, filep); 562 err = kfd_process_init_cwsr_apu(process, filep);
365 if (err) 563 if (err)
366 goto err_init_cwsr; 564 goto err_init_cwsr;
367 565
368 return process; 566 return process;
369 567
370err_init_cwsr: 568err_init_cwsr:
569 kfd_process_free_outstanding_kfd_bos(process);
371 kfd_process_destroy_pdds(process); 570 kfd_process_destroy_pdds(process);
372err_init_apertures: 571err_init_apertures:
373 pqm_uninit(&process->pqm); 572 pqm_uninit(&process->pqm);
@@ -418,18 +617,70 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
418 pdd->already_dequeued = false; 617 pdd->already_dequeued = false;
419 list_add(&pdd->per_device_list, &p->per_device_data); 618 list_add(&pdd->per_device_list, &p->per_device_data);
420 619
421 /* Create the GPUVM context for this specific device */ 620 /* Init idr used for memory handle translation */
422 if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm, 621 idr_init(&pdd->alloc_idr);
423 &p->kgd_process_info, &p->ef)) { 622
623 return pdd;
624}
625
626/**
627 * kfd_process_device_init_vm - Initialize a VM for a process-device
628 *
629 * @pdd: The process-device
630 * @drm_file: Optional pointer to a DRM file descriptor
631 *
632 * If @drm_file is specified, it will be used to acquire the VM from
633 * that file descriptor. If successful, the @pdd takes ownership of
634 * the file descriptor.
635 *
636 * If @drm_file is NULL, a new VM is created.
637 *
638 * Returns 0 on success, -errno on failure.
639 */
640int kfd_process_device_init_vm(struct kfd_process_device *pdd,
641 struct file *drm_file)
642{
643 struct kfd_process *p;
644 struct kfd_dev *dev;
645 int ret;
646
647 if (pdd->vm)
648 return drm_file ? -EBUSY : 0;
649
650 p = pdd->process;
651 dev = pdd->dev;
652
653 if (drm_file)
654 ret = dev->kfd2kgd->acquire_process_vm(
655 dev->kgd, drm_file,
656 &pdd->vm, &p->kgd_process_info, &p->ef);
657 else
658 ret = dev->kfd2kgd->create_process_vm(
659 dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef);
660 if (ret) {
424 pr_err("Failed to create process VM object\n"); 661 pr_err("Failed to create process VM object\n");
425 goto err_create_pdd; 662 return ret;
426 } 663 }
427 return pdd;
428 664
429err_create_pdd: 665 ret = kfd_process_device_reserve_ib_mem(pdd);
430 list_del(&pdd->per_device_list); 666 if (ret)
431 kfree(pdd); 667 goto err_reserve_ib_mem;
432 return NULL; 668 ret = kfd_process_device_init_cwsr_dgpu(pdd);
669 if (ret)
670 goto err_init_cwsr;
671
672 pdd->drm_file = drm_file;
673
674 return 0;
675
676err_init_cwsr:
677err_reserve_ib_mem:
678 kfd_process_device_free_bos(pdd);
679 if (!drm_file)
680 dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
681 pdd->vm = NULL;
682
683 return ret;
433} 684}
434 685
435/* 686/*
@@ -455,6 +706,10 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
455 if (err) 706 if (err)
456 return ERR_PTR(err); 707 return ERR_PTR(err);
457 708
709 err = kfd_process_device_init_vm(pdd, NULL);
710 if (err)
711 return ERR_PTR(err);
712
458 return pdd; 713 return pdd;
459} 714}
460 715
@@ -480,6 +735,37 @@ bool kfd_has_process_device_data(struct kfd_process *p)
480 return !(list_empty(&p->per_device_data)); 735 return !(list_empty(&p->per_device_data));
481} 736}
482 737
738/* Create specific handle mapped to mem from process local memory idr
739 * Assumes that the process lock is held.
740 */
741int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
742 void *mem)
743{
744 return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
745}
746
747/* Translate specific handle from process local memory idr
748 * Assumes that the process lock is held.
749 */
750void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
751 int handle)
752{
753 if (handle < 0)
754 return NULL;
755
756 return idr_find(&pdd->alloc_idr, handle);
757}
758
759/* Remove specific handle from process local memory idr
760 * Assumes that the process lock is held.
761 */
762void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
763 int handle)
764{
765 if (handle >= 0)
766 idr_remove(&pdd->alloc_idr, handle);
767}
768
483/* This increments the process->ref counter. */ 769/* This increments the process->ref counter. */
484struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) 770struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
485{ 771{
@@ -605,7 +891,7 @@ static void evict_process_worker(struct work_struct *work)
605 dma_fence_signal(p->ef); 891 dma_fence_signal(p->ef);
606 dma_fence_put(p->ef); 892 dma_fence_put(p->ef);
607 p->ef = NULL; 893 p->ef = NULL;
608 schedule_delayed_work(&p->restore_work, 894 queue_delayed_work(kfd_restore_wq, &p->restore_work,
609 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); 895 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
610 896
611 pr_debug("Finished evicting pasid %d\n", p->pasid); 897 pr_debug("Finished evicting pasid %d\n", p->pasid);
@@ -654,7 +940,7 @@ static void restore_process_worker(struct work_struct *work)
654 if (ret) { 940 if (ret) {
655 pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", 941 pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
656 p->pasid, PROCESS_BACK_OFF_TIME_MS); 942 p->pasid, PROCESS_BACK_OFF_TIME_MS);
657 ret = schedule_delayed_work(&p->restore_work, 943 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
658 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); 944 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
659 WARN(!ret, "reschedule restore work failed\n"); 945 WARN(!ret, "reschedule restore work failed\n");
660 return; 946 return;
@@ -693,7 +979,7 @@ int kfd_resume_all_processes(void)
693 int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); 979 int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
694 980
695 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 981 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
696 if (!schedule_delayed_work(&p->restore_work, 0)) { 982 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
697 pr_err("Restore process %d failed during resume\n", 983 pr_err("Restore process %d failed during resume\n",
698 p->pasid); 984 p->pasid);
699 ret = -EFAULT; 985 ret = -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 250615535563..ac28abc94e57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -441,6 +441,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
441 dev->node_props.device_id); 441 dev->node_props.device_id);
442 sysfs_show_32bit_prop(buffer, "location_id", 442 sysfs_show_32bit_prop(buffer, "location_id",
443 dev->node_props.location_id); 443 dev->node_props.location_id);
444 sysfs_show_32bit_prop(buffer, "drm_render_minor",
445 dev->node_props.drm_render_minor);
444 446
445 if (dev->gpu) { 447 if (dev->gpu) {
446 log_max_watch_addr = 448 log_max_watch_addr =
@@ -1214,6 +1216,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
1214 dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd); 1216 dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
1215 dev->node_props.max_engine_clk_ccompute = 1217 dev->node_props.max_engine_clk_ccompute =
1216 cpufreq_quick_get_max(0) / 1000; 1218 cpufreq_quick_get_max(0) / 1000;
1219 dev->node_props.drm_render_minor =
1220 gpu->shared_resources.drm_render_minor;
1217 1221
1218 kfd_fill_mem_clk_max_info(dev); 1222 kfd_fill_mem_clk_max_info(dev);
1219 kfd_fill_iolink_non_crat_info(dev); 1223 kfd_fill_iolink_non_crat_info(dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index c0be2be6dca5..eb54cfcaf039 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -71,6 +71,7 @@ struct kfd_node_properties {
71 uint32_t location_id; 71 uint32_t location_id;
72 uint32_t max_engine_clk_fcompute; 72 uint32_t max_engine_clk_fcompute;
73 uint32_t max_engine_clk_ccompute; 73 uint32_t max_engine_clk_ccompute;
74 int32_t drm_render_minor;
74 uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; 75 uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
75}; 76};
76 77
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 1e5c22ceb256..237289a72bb7 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -130,6 +130,7 @@ struct tile_config {
130 130
131/* 131/*
132 * Allocation flag domains 132 * Allocation flag domains
133 * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
133 */ 134 */
134#define ALLOC_MEM_FLAGS_VRAM (1 << 0) 135#define ALLOC_MEM_FLAGS_VRAM (1 << 0)
135#define ALLOC_MEM_FLAGS_GTT (1 << 1) 136#define ALLOC_MEM_FLAGS_GTT (1 << 1)
@@ -138,6 +139,7 @@ struct tile_config {
138 139
139/* 140/*
140 * Allocation flags attributes/access options. 141 * Allocation flags attributes/access options.
142 * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
141 */ 143 */
142#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) 144#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
143#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) 145#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
@@ -336,6 +338,8 @@ struct kfd2kgd_calls {
336 338
337 int (*create_process_vm)(struct kgd_dev *kgd, void **vm, 339 int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
338 void **process_info, struct dma_fence **ef); 340 void **process_info, struct dma_fence **ef);
341 int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp,
342 void **vm, void **process_info, struct dma_fence **ef);
339 void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); 343 void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
340 uint32_t (*get_process_page_dir)(void *vm); 344 uint32_t (*get_process_page_dir)(void *vm);
341 void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, 345 void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 111d73ba2d96..b4f5073dbac2 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args {
107 __u32 pad; 107 __u32 pad;
108}; 108};
109 109
110#define NUM_OF_SUPPORTED_GPUS 7
111
112struct kfd_process_device_apertures { 110struct kfd_process_device_apertures {
113 __u64 lds_base; /* from KFD */ 111 __u64 lds_base; /* from KFD */
114 __u64 lds_limit; /* from KFD */ 112 __u64 lds_limit; /* from KFD */
@@ -120,6 +118,12 @@ struct kfd_process_device_apertures {
120 __u32 pad; 118 __u32 pad;
121}; 119};
122 120
121/*
122 * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
123 * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
124 * unlimited number of GPUs.
125 */
126#define NUM_OF_SUPPORTED_GPUS 7
123struct kfd_ioctl_get_process_apertures_args { 127struct kfd_ioctl_get_process_apertures_args {
124 struct kfd_process_device_apertures 128 struct kfd_process_device_apertures
125 process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ 129 process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
@@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args {
129 __u32 pad; 133 __u32 pad;
130}; 134};
131 135
136struct kfd_ioctl_get_process_apertures_new_args {
137 /* User allocated. Pointer to struct kfd_process_device_apertures
138 * filled in by Kernel
139 */
140 __u64 kfd_process_device_apertures_ptr;
141 /* to KFD - indicates amount of memory present in
142 * kfd_process_device_apertures_ptr
143 * from KFD - Number of entries filled by KFD.
144 */
145 __u32 num_of_nodes;
146 __u32 pad;
147};
148
132#define MAX_ALLOWED_NUM_POINTS 100 149#define MAX_ALLOWED_NUM_POINTS 100
133#define MAX_ALLOWED_AW_BUFF_SIZE 4096 150#define MAX_ALLOWED_AW_BUFF_SIZE 4096
134#define MAX_ALLOWED_WAC_BUFF_SIZE 128 151#define MAX_ALLOWED_WAC_BUFF_SIZE 128
@@ -269,6 +286,86 @@ struct kfd_ioctl_set_trap_handler_args {
269 __u32 pad; 286 __u32 pad;
270}; 287};
271 288
289struct kfd_ioctl_acquire_vm_args {
290 __u32 drm_fd; /* to KFD */
291 __u32 gpu_id; /* to KFD */
292};
293
294/* Allocation flags: memory types */
295#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0)
296#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1)
297#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2)
298#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
299/* Allocation flags: attributes/access options */
300#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
301#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
302#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
303#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
304#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
305#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26)
306
307/* Allocate memory for later SVM (shared virtual memory) mapping.
308 *
309 * @va_addr: virtual address of the memory to be allocated
310 * all later mappings on all GPUs will use this address
311 * @size: size in bytes
312 * @handle: buffer handle returned to user mode, used to refer to
313 * this allocation for mapping, unmapping and freeing
314 * @mmap_offset: for CPU-mapping the allocation by mmapping a render node
315 * for userptrs this is overloaded to specify the CPU address
316 * @gpu_id: device identifier
317 * @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
318 */
319struct kfd_ioctl_alloc_memory_of_gpu_args {
320 __u64 va_addr; /* to KFD */
321 __u64 size; /* to KFD */
322 __u64 handle; /* from KFD */
323 __u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
324 __u32 gpu_id; /* to KFD */
325 __u32 flags;
326};
327
328/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
329 *
330 * @handle: memory handle returned by alloc
331 */
332struct kfd_ioctl_free_memory_of_gpu_args {
333 __u64 handle; /* to KFD */
334};
335
336/* Map memory to one or more GPUs
337 *
338 * @handle: memory handle returned by alloc
339 * @device_ids_array_ptr: array of gpu_ids (__u32 per device)
340 * @n_devices: number of devices in the array
341 * @n_success: number of devices mapped successfully
342 *
343 * @n_success returns information to the caller how many devices from
344 * the start of the array have mapped the buffer successfully. It can
345 * be passed into a subsequent retry call to skip those devices. For
346 * the first call the caller should initialize it to 0.
347 *
348 * If the ioctl completes with return code 0 (success), n_success ==
349 * n_devices.
350 */
351struct kfd_ioctl_map_memory_to_gpu_args {
352 __u64 handle; /* to KFD */
353 __u64 device_ids_array_ptr; /* to KFD */
354 __u32 n_devices; /* to KFD */
355 __u32 n_success; /* to/from KFD */
356};
357
358/* Unmap memory from one or more GPUs
359 *
360 * same arguments as for mapping
361 */
362struct kfd_ioctl_unmap_memory_from_gpu_args {
363 __u64 handle; /* to KFD */
364 __u64 device_ids_array_ptr; /* to KFD */
365 __u32 n_devices; /* to KFD */
366 __u32 n_success; /* to/from KFD */
367};
368
272#define AMDKFD_IOCTL_BASE 'K' 369#define AMDKFD_IOCTL_BASE 'K'
273#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) 370#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
274#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) 371#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -332,7 +429,26 @@ struct kfd_ioctl_set_trap_handler_args {
332#define AMDKFD_IOC_SET_TRAP_HANDLER \ 429#define AMDKFD_IOC_SET_TRAP_HANDLER \
333 AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) 430 AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
334 431
432#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
433 AMDKFD_IOWR(0x14, \
434 struct kfd_ioctl_get_process_apertures_new_args)
435
436#define AMDKFD_IOC_ACQUIRE_VM \
437 AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
438
439#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \
440 AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
441
442#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \
443 AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
444
445#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \
446 AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
447
448#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
449 AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
450
335#define AMDKFD_COMMAND_START 0x01 451#define AMDKFD_COMMAND_START 0x01
336#define AMDKFD_COMMAND_END 0x14 452#define AMDKFD_COMMAND_END 0x1A
337 453
338#endif 454#endif