diff options
author | Felix Kuehling <Felix.Kuehling@amd.com> | 2018-03-23 15:32:33 -0400 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2018-03-23 15:32:33 -0400 |
commit | 5ae0283e831a94c714fce61063e4724baf364ef3 (patch) | |
tree | b21af351d9e41294d05804d2f4e2848945701ece /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |
parent | 6b95e7973a136181e37446bd29b0b2e2f0d2d653 (diff) |
drm/amdgpu: Add userptr support for KFD
This adds support for allocating, mapping, unmapping and freeing
userptr BOs, and for handling MMU notifiers.
v2: updated a comment
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 567 |
1 files changed, 543 insertions, 24 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2463ff6ac9ca..5296e24fd662 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #define pr_fmt(fmt) "kfd2kgd: " fmt | 23 | #define pr_fmt(fmt) "kfd2kgd: " fmt |
24 | 24 | ||
25 | #include <linux/list.h> | 25 | #include <linux/list.h> |
26 | #include <linux/sched/mm.h> | ||
26 | #include <drm/drmP.h> | 27 | #include <drm/drmP.h> |
27 | #include "amdgpu_object.h" | 28 | #include "amdgpu_object.h" |
28 | #include "amdgpu_vm.h" | 29 | #include "amdgpu_vm.h" |
@@ -33,10 +34,20 @@ | |||
33 | */ | 34 | */ |
34 | #define VI_BO_SIZE_ALIGN (0x8000) | 35 | #define VI_BO_SIZE_ALIGN (0x8000) |
35 | 36 | ||
37 | /* BO flag to indicate a KFD userptr BO */ | ||
38 | #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) | ||
39 | |||
40 | /* Userptr restore delay, just long enough to allow consecutive VM | ||
41 | * changes to accumulate | ||
42 | */ | ||
43 | #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 | ||
44 | |||
36 | /* Impose limit on how much memory KFD can use */ | 45 | /* Impose limit on how much memory KFD can use */ |
37 | static struct { | 46 | static struct { |
38 | uint64_t max_system_mem_limit; | 47 | uint64_t max_system_mem_limit; |
48 | uint64_t max_userptr_mem_limit; | ||
39 | int64_t system_mem_used; | 49 | int64_t system_mem_used; |
50 | int64_t userptr_mem_used; | ||
40 | spinlock_t mem_limit_lock; | 51 | spinlock_t mem_limit_lock; |
41 | } kfd_mem_limit; | 52 | } kfd_mem_limit; |
42 | 53 | ||
@@ -57,6 +68,7 @@ static const char * const domain_bit_to_string[] = { | |||
57 | 68 | ||
58 | #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] | 69 | #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] |
59 | 70 | ||
71 | static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); | ||
60 | 72 | ||
61 | 73 | ||
62 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) | 74 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) |
@@ -78,6 +90,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, | |||
78 | 90 | ||
79 | /* Set memory usage limits. Current, limits are | 91 | /* Set memory usage limits. Current, limits are |
80 | * System (kernel) memory - 3/8th System RAM | 92 | * System (kernel) memory - 3/8th System RAM |
93 | * Userptr memory - 3/4th System RAM | ||
81 | */ | 94 | */ |
82 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) | 95 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) |
83 | { | 96 | { |
@@ -90,8 +103,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) | |||
90 | 103 | ||
91 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); | 104 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); |
92 | kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); | 105 | kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); |
93 | pr_debug("Kernel memory limit %lluM\n", | 106 | kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); |
94 | (kfd_mem_limit.max_system_mem_limit >> 20)); | 107 | pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", |
108 | (kfd_mem_limit.max_system_mem_limit >> 20), | ||
109 | (kfd_mem_limit.max_userptr_mem_limit >> 20)); | ||
95 | } | 110 | } |
96 | 111 | ||
97 | static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, | 112 | static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, |
@@ -111,6 +126,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, | |||
111 | goto err_no_mem; | 126 | goto err_no_mem; |
112 | } | 127 | } |
113 | kfd_mem_limit.system_mem_used += (acc_size + size); | 128 | kfd_mem_limit.system_mem_used += (acc_size + size); |
129 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { | ||
130 | if ((kfd_mem_limit.system_mem_used + acc_size > | ||
131 | kfd_mem_limit.max_system_mem_limit) || | ||
132 | (kfd_mem_limit.userptr_mem_used + (size + acc_size) > | ||
133 | kfd_mem_limit.max_userptr_mem_limit)) { | ||
134 | ret = -ENOMEM; | ||
135 | goto err_no_mem; | ||
136 | } | ||
137 | kfd_mem_limit.system_mem_used += acc_size; | ||
138 | kfd_mem_limit.userptr_mem_used += size; | ||
114 | } | 139 | } |
115 | err_no_mem: | 140 | err_no_mem: |
116 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 141 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
@@ -126,10 +151,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, | |||
126 | sizeof(struct amdgpu_bo)); | 151 | sizeof(struct amdgpu_bo)); |
127 | 152 | ||
128 | spin_lock(&kfd_mem_limit.mem_limit_lock); | 153 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
129 | if (domain == AMDGPU_GEM_DOMAIN_GTT) | 154 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
130 | kfd_mem_limit.system_mem_used -= (acc_size + size); | 155 | kfd_mem_limit.system_mem_used -= (acc_size + size); |
156 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { | ||
157 | kfd_mem_limit.system_mem_used -= acc_size; | ||
158 | kfd_mem_limit.userptr_mem_used -= size; | ||
159 | } | ||
131 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, | 160 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
132 | "kfd system memory accounting unbalanced"); | 161 | "kfd system memory accounting unbalanced"); |
162 | WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, | ||
163 | "kfd userptr memory accounting unbalanced"); | ||
133 | 164 | ||
134 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 165 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
135 | } | 166 | } |
@@ -138,12 +169,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) | |||
138 | { | 169 | { |
139 | spin_lock(&kfd_mem_limit.mem_limit_lock); | 170 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
140 | 171 | ||
141 | if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { | 172 | if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { |
173 | kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; | ||
174 | kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); | ||
175 | } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { | ||
142 | kfd_mem_limit.system_mem_used -= | 176 | kfd_mem_limit.system_mem_used -= |
143 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); | 177 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); |
144 | } | 178 | } |
145 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, | 179 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
146 | "kfd system memory accounting unbalanced"); | 180 | "kfd system memory accounting unbalanced"); |
181 | WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, | ||
182 | "kfd userptr memory accounting unbalanced"); | ||
147 | 183 | ||
148 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 184 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
149 | } | 185 | } |
@@ -506,7 +542,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev, | |||
506 | } | 542 | } |
507 | 543 | ||
508 | static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, | 544 | static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, |
509 | struct amdkfd_process_info *process_info) | 545 | struct amdkfd_process_info *process_info, |
546 | bool userptr) | ||
510 | { | 547 | { |
511 | struct ttm_validate_buffer *entry = &mem->validate_list; | 548 | struct ttm_validate_buffer *entry = &mem->validate_list; |
512 | struct amdgpu_bo *bo = mem->bo; | 549 | struct amdgpu_bo *bo = mem->bo; |
@@ -515,8 +552,93 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, | |||
515 | entry->shared = true; | 552 | entry->shared = true; |
516 | entry->bo = &bo->tbo; | 553 | entry->bo = &bo->tbo; |
517 | mutex_lock(&process_info->lock); | 554 | mutex_lock(&process_info->lock); |
518 | list_add_tail(&entry->head, &process_info->kfd_bo_list); | 555 | if (userptr) |
556 | list_add_tail(&entry->head, &process_info->userptr_valid_list); | ||
557 | else | ||
558 | list_add_tail(&entry->head, &process_info->kfd_bo_list); | ||
559 | mutex_unlock(&process_info->lock); | ||
560 | } | ||
561 | |||
562 | /* Initializes user pages. It registers the MMU notifier and validates | ||
563 | * the userptr BO in the GTT domain. | ||
564 | * | ||
565 | * The BO must already be on the userptr_valid_list. Otherwise an | ||
566 | * eviction and restore may happen that leaves the new BO unmapped | ||
567 | * with the user mode queues running. | ||
568 | * | ||
569 | * Takes the process_info->lock to protect against concurrent restore | ||
570 | * workers. | ||
571 | * | ||
572 | * Returns 0 for success, negative errno for errors. | ||
573 | */ | ||
574 | static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, | ||
575 | uint64_t user_addr) | ||
576 | { | ||
577 | struct amdkfd_process_info *process_info = mem->process_info; | ||
578 | struct amdgpu_bo *bo = mem->bo; | ||
579 | struct ttm_operation_ctx ctx = { true, false }; | ||
580 | int ret = 0; | ||
581 | |||
582 | mutex_lock(&process_info->lock); | ||
583 | |||
584 | ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); | ||
585 | if (ret) { | ||
586 | pr_err("%s: Failed to set userptr: %d\n", __func__, ret); | ||
587 | goto out; | ||
588 | } | ||
589 | |||
590 | ret = amdgpu_mn_register(bo, user_addr); | ||
591 | if (ret) { | ||
592 | pr_err("%s: Failed to register MMU notifier: %d\n", | ||
593 | __func__, ret); | ||
594 | goto out; | ||
595 | } | ||
596 | |||
597 | /* If no restore worker is running concurrently, user_pages | ||
598 | * should not be allocated | ||
599 | */ | ||
600 | WARN(mem->user_pages, "Leaking user_pages array"); | ||
601 | |||
602 | mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, | ||
603 | sizeof(struct page *), | ||
604 | GFP_KERNEL | __GFP_ZERO); | ||
605 | if (!mem->user_pages) { | ||
606 | pr_err("%s: Failed to allocate pages array\n", __func__); | ||
607 | ret = -ENOMEM; | ||
608 | goto unregister_out; | ||
609 | } | ||
610 | |||
611 | ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); | ||
612 | if (ret) { | ||
613 | pr_err("%s: Failed to get user pages: %d\n", __func__, ret); | ||
614 | goto free_out; | ||
615 | } | ||
616 | |||
617 | amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); | ||
618 | |||
619 | ret = amdgpu_bo_reserve(bo, true); | ||
620 | if (ret) { | ||
621 | pr_err("%s: Failed to reserve BO\n", __func__); | ||
622 | goto release_out; | ||
623 | } | ||
624 | amdgpu_ttm_placement_from_domain(bo, mem->domain); | ||
625 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||
626 | if (ret) | ||
627 | pr_err("%s: failed to validate BO\n", __func__); | ||
628 | amdgpu_bo_unreserve(bo); | ||
629 | |||
630 | release_out: | ||
631 | if (ret) | ||
632 | release_pages(mem->user_pages, bo->tbo.ttm->num_pages); | ||
633 | free_out: | ||
634 | kvfree(mem->user_pages); | ||
635 | mem->user_pages = NULL; | ||
636 | unregister_out: | ||
637 | if (ret) | ||
638 | amdgpu_mn_unregister(bo); | ||
639 | out: | ||
519 | mutex_unlock(&process_info->lock); | 640 | mutex_unlock(&process_info->lock); |
641 | return ret; | ||
520 | } | 642 | } |
521 | 643 | ||
522 | /* Reserving a BO and its page table BOs must happen atomically to | 644 | /* Reserving a BO and its page table BOs must happen atomically to |
@@ -748,7 +870,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, | |||
748 | } | 870 | } |
749 | 871 | ||
750 | static int map_bo_to_gpuvm(struct amdgpu_device *adev, | 872 | static int map_bo_to_gpuvm(struct amdgpu_device *adev, |
751 | struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) | 873 | struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, |
874 | bool no_update_pte) | ||
752 | { | 875 | { |
753 | int ret; | 876 | int ret; |
754 | 877 | ||
@@ -762,6 +885,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, | |||
762 | return ret; | 885 | return ret; |
763 | } | 886 | } |
764 | 887 | ||
888 | if (no_update_pte) | ||
889 | return 0; | ||
890 | |||
765 | ret = update_gpuvm_pte(adev, entry, sync); | 891 | ret = update_gpuvm_pte(adev, entry, sync); |
766 | if (ret) { | 892 | if (ret) { |
767 | pr_err("update_gpuvm_pte() failed\n"); | 893 | pr_err("update_gpuvm_pte() failed\n"); |
@@ -820,6 +946,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, | |||
820 | mutex_init(&info->lock); | 946 | mutex_init(&info->lock); |
821 | INIT_LIST_HEAD(&info->vm_list_head); | 947 | INIT_LIST_HEAD(&info->vm_list_head); |
822 | INIT_LIST_HEAD(&info->kfd_bo_list); | 948 | INIT_LIST_HEAD(&info->kfd_bo_list); |
949 | INIT_LIST_HEAD(&info->userptr_valid_list); | ||
950 | INIT_LIST_HEAD(&info->userptr_inval_list); | ||
823 | 951 | ||
824 | info->eviction_fence = | 952 | info->eviction_fence = |
825 | amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), | 953 | amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), |
@@ -830,6 +958,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, | |||
830 | goto create_evict_fence_fail; | 958 | goto create_evict_fence_fail; |
831 | } | 959 | } |
832 | 960 | ||
961 | info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); | ||
962 | atomic_set(&info->evicted_bos, 0); | ||
963 | INIT_DELAYED_WORK(&info->restore_userptr_work, | ||
964 | amdgpu_amdkfd_restore_userptr_worker); | ||
965 | |||
833 | *process_info = info; | 966 | *process_info = info; |
834 | *ef = dma_fence_get(&info->eviction_fence->base); | 967 | *ef = dma_fence_get(&info->eviction_fence->base); |
835 | } | 968 | } |
@@ -872,6 +1005,7 @@ reserve_pd_fail: | |||
872 | dma_fence_put(*ef); | 1005 | dma_fence_put(*ef); |
873 | *ef = NULL; | 1006 | *ef = NULL; |
874 | *process_info = NULL; | 1007 | *process_info = NULL; |
1008 | put_pid(info->pid); | ||
875 | create_evict_fence_fail: | 1009 | create_evict_fence_fail: |
876 | mutex_destroy(&info->lock); | 1010 | mutex_destroy(&info->lock); |
877 | kfree(info); | 1011 | kfree(info); |
@@ -967,8 +1101,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, | |||
967 | /* Release per-process resources when last compute VM is destroyed */ | 1101 | /* Release per-process resources when last compute VM is destroyed */ |
968 | if (!process_info->n_vms) { | 1102 | if (!process_info->n_vms) { |
969 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); | 1103 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); |
1104 | WARN_ON(!list_empty(&process_info->userptr_valid_list)); | ||
1105 | WARN_ON(!list_empty(&process_info->userptr_inval_list)); | ||
970 | 1106 | ||
971 | dma_fence_put(&process_info->eviction_fence->base); | 1107 | dma_fence_put(&process_info->eviction_fence->base); |
1108 | cancel_delayed_work_sync(&process_info->restore_userptr_work); | ||
1109 | put_pid(process_info->pid); | ||
972 | mutex_destroy(&process_info->lock); | 1110 | mutex_destroy(&process_info->lock); |
973 | kfree(process_info); | 1111 | kfree(process_info); |
974 | } | 1112 | } |
@@ -1003,9 +1141,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1003 | { | 1141 | { |
1004 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 1142 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
1005 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; | 1143 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
1144 | uint64_t user_addr = 0; | ||
1006 | struct amdgpu_bo *bo; | 1145 | struct amdgpu_bo *bo; |
1007 | int byte_align; | 1146 | int byte_align; |
1008 | u32 alloc_domain; | 1147 | u32 domain, alloc_domain; |
1009 | u64 alloc_flags; | 1148 | u64 alloc_flags; |
1010 | uint32_t mapping_flags; | 1149 | uint32_t mapping_flags; |
1011 | int ret; | 1150 | int ret; |
@@ -1014,14 +1153,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1014 | * Check on which domain to allocate BO | 1153 | * Check on which domain to allocate BO |
1015 | */ | 1154 | */ |
1016 | if (flags & ALLOC_MEM_FLAGS_VRAM) { | 1155 | if (flags & ALLOC_MEM_FLAGS_VRAM) { |
1017 | alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; | 1156 | domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; |
1018 | alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; | 1157 | alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; |
1019 | alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? | 1158 | alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? |
1020 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : | 1159 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : |
1021 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS; | 1160 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS; |
1022 | } else if (flags & ALLOC_MEM_FLAGS_GTT) { | 1161 | } else if (flags & ALLOC_MEM_FLAGS_GTT) { |
1023 | alloc_domain = AMDGPU_GEM_DOMAIN_GTT; | 1162 | domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; |
1024 | alloc_flags = 0; | 1163 | alloc_flags = 0; |
1164 | } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { | ||
1165 | domain = AMDGPU_GEM_DOMAIN_GTT; | ||
1166 | alloc_domain = AMDGPU_GEM_DOMAIN_CPU; | ||
1167 | alloc_flags = 0; | ||
1168 | if (!offset || !*offset) | ||
1169 | return -EINVAL; | ||
1170 | user_addr = *offset; | ||
1025 | } else { | 1171 | } else { |
1026 | return -EINVAL; | 1172 | return -EINVAL; |
1027 | } | 1173 | } |
@@ -1078,18 +1224,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
1078 | } | 1224 | } |
1079 | bo->kfd_bo = *mem; | 1225 | bo->kfd_bo = *mem; |
1080 | (*mem)->bo = bo; | 1226 | (*mem)->bo = bo; |
1227 | if (user_addr) | ||
1228 | bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; | ||
1081 | 1229 | ||
1082 | (*mem)->va = va; | 1230 | (*mem)->va = va; |
1083 | (*mem)->domain = alloc_domain; | 1231 | (*mem)->domain = domain; |
1084 | (*mem)->mapped_to_gpu_memory = 0; | 1232 | (*mem)->mapped_to_gpu_memory = 0; |
1085 | (*mem)->process_info = avm->process_info; | 1233 | (*mem)->process_info = avm->process_info; |
1086 | add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); | 1234 | add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); |
1235 | |||
1236 | if (user_addr) { | ||
1237 | ret = init_user_pages(*mem, current->mm, user_addr); | ||
1238 | if (ret) { | ||
1239 | mutex_lock(&avm->process_info->lock); | ||
1240 | list_del(&(*mem)->validate_list.head); | ||
1241 | mutex_unlock(&avm->process_info->lock); | ||
1242 | goto allocate_init_user_pages_failed; | ||
1243 | } | ||
1244 | } | ||
1087 | 1245 | ||
1088 | if (offset) | 1246 | if (offset) |
1089 | *offset = amdgpu_bo_mmap_offset(bo); | 1247 | *offset = amdgpu_bo_mmap_offset(bo); |
1090 | 1248 | ||
1091 | return 0; | 1249 | return 0; |
1092 | 1250 | ||
1251 | allocate_init_user_pages_failed: | ||
1252 | amdgpu_bo_unref(&bo); | ||
1253 | /* Don't unreserve system mem limit twice */ | ||
1254 | goto err_reserve_system_mem; | ||
1093 | err_bo_create: | 1255 | err_bo_create: |
1094 | unreserve_system_mem_limit(adev, size, alloc_domain); | 1256 | unreserve_system_mem_limit(adev, size, alloc_domain); |
1095 | err_reserve_system_mem: | 1257 | err_reserve_system_mem: |
@@ -1122,12 +1284,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( | |||
1122 | * be freed anyway | 1284 | * be freed anyway |
1123 | */ | 1285 | */ |
1124 | 1286 | ||
1287 | /* No more MMU notifiers */ | ||
1288 | amdgpu_mn_unregister(mem->bo); | ||
1289 | |||
1125 | /* Make sure restore workers don't access the BO any more */ | 1290 | /* Make sure restore workers don't access the BO any more */ |
1126 | bo_list_entry = &mem->validate_list; | 1291 | bo_list_entry = &mem->validate_list; |
1127 | mutex_lock(&process_info->lock); | 1292 | mutex_lock(&process_info->lock); |
1128 | list_del(&bo_list_entry->head); | 1293 | list_del(&bo_list_entry->head); |
1129 | mutex_unlock(&process_info->lock); | 1294 | mutex_unlock(&process_info->lock); |
1130 | 1295 | ||
1296 | /* Free user pages if necessary */ | ||
1297 | if (mem->user_pages) { | ||
1298 | pr_debug("%s: Freeing user_pages array\n", __func__); | ||
1299 | if (mem->user_pages[0]) | ||
1300 | release_pages(mem->user_pages, | ||
1301 | mem->bo->tbo.ttm->num_pages); | ||
1302 | kvfree(mem->user_pages); | ||
1303 | } | ||
1304 | |||
1131 | ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); | 1305 | ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); |
1132 | if (unlikely(ret)) | 1306 | if (unlikely(ret)) |
1133 | return ret; | 1307 | return ret; |
@@ -1173,21 +1347,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
1173 | struct kfd_bo_va_list *bo_va_entry = NULL; | 1347 | struct kfd_bo_va_list *bo_va_entry = NULL; |
1174 | struct kfd_bo_va_list *bo_va_entry_aql = NULL; | 1348 | struct kfd_bo_va_list *bo_va_entry_aql = NULL; |
1175 | unsigned long bo_size; | 1349 | unsigned long bo_size; |
1176 | 1350 | bool is_invalid_userptr = false; | |
1177 | /* Make sure restore is not running concurrently. | ||
1178 | */ | ||
1179 | mutex_lock(&mem->process_info->lock); | ||
1180 | |||
1181 | mutex_lock(&mem->lock); | ||
1182 | 1351 | ||
1183 | bo = mem->bo; | 1352 | bo = mem->bo; |
1184 | |||
1185 | if (!bo) { | 1353 | if (!bo) { |
1186 | pr_err("Invalid BO when mapping memory to GPU\n"); | 1354 | pr_err("Invalid BO when mapping memory to GPU\n"); |
1187 | ret = -EINVAL; | 1355 | return -EINVAL; |
1188 | goto out; | 1356 | } |
1357 | |||
1358 | /* Make sure restore is not running concurrently. Since we | ||
1359 | * don't map invalid userptr BOs, we rely on the next restore | ||
1360 | * worker to do the mapping | ||
1361 | */ | ||
1362 | mutex_lock(&mem->process_info->lock); | ||
1363 | |||
1364 | /* Lock mmap-sem. If we find an invalid userptr BO, we can be | ||
1365 | * sure that the MMU notifier is no longer running | ||
1366 | * concurrently and the queues are actually stopped | ||
1367 | */ | ||
1368 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | ||
1369 | down_write(¤t->mm->mmap_sem); | ||
1370 | is_invalid_userptr = atomic_read(&mem->invalid); | ||
1371 | up_write(¤t->mm->mmap_sem); | ||
1189 | } | 1372 | } |
1190 | 1373 | ||
1374 | mutex_lock(&mem->lock); | ||
1375 | |||
1191 | domain = mem->domain; | 1376 | domain = mem->domain; |
1192 | bo_size = bo->tbo.mem.size; | 1377 | bo_size = bo->tbo.mem.size; |
1193 | 1378 | ||
@@ -1200,6 +1385,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
1200 | if (unlikely(ret)) | 1385 | if (unlikely(ret)) |
1201 | goto out; | 1386 | goto out; |
1202 | 1387 | ||
1388 | /* Userptr can be marked as "not invalid", but not actually be | ||
1389 | * validated yet (still in the system domain). In that case | ||
1390 | * the queues are still stopped and we can leave mapping for | ||
1391 | * the next restore worker | ||
1392 | */ | ||
1393 | if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) | ||
1394 | is_invalid_userptr = true; | ||
1395 | |||
1203 | if (check_if_add_bo_to_vm(avm, mem)) { | 1396 | if (check_if_add_bo_to_vm(avm, mem)) { |
1204 | ret = add_bo_to_vm(adev, mem, avm, false, | 1397 | ret = add_bo_to_vm(adev, mem, avm, false, |
1205 | &bo_va_entry); | 1398 | &bo_va_entry); |
@@ -1217,7 +1410,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
1217 | goto add_bo_to_vm_failed; | 1410 | goto add_bo_to_vm_failed; |
1218 | } | 1411 | } |
1219 | 1412 | ||
1220 | if (mem->mapped_to_gpu_memory == 0) { | 1413 | if (mem->mapped_to_gpu_memory == 0 && |
1414 | !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | ||
1221 | /* Validate BO only once. The eviction fence gets added to BO | 1415 | /* Validate BO only once. The eviction fence gets added to BO |
1222 | * the first time it is mapped. Validate will wait for all | 1416 | * the first time it is mapped. Validate will wait for all |
1223 | * background evictions to complete. | 1417 | * background evictions to complete. |
@@ -1235,7 +1429,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
1235 | entry->va, entry->va + bo_size, | 1429 | entry->va, entry->va + bo_size, |
1236 | entry); | 1430 | entry); |
1237 | 1431 | ||
1238 | ret = map_bo_to_gpuvm(adev, entry, ctx.sync); | 1432 | ret = map_bo_to_gpuvm(adev, entry, ctx.sync, |
1433 | is_invalid_userptr); | ||
1239 | if (ret) { | 1434 | if (ret) { |
1240 | pr_err("Failed to map radeon bo to gpuvm\n"); | 1435 | pr_err("Failed to map radeon bo to gpuvm\n"); |
1241 | goto map_bo_to_gpuvm_failed; | 1436 | goto map_bo_to_gpuvm_failed; |
@@ -1418,13 +1613,337 @@ bo_reserve_failed: | |||
1418 | return ret; | 1613 | return ret; |
1419 | } | 1614 | } |
1420 | 1615 | ||
1616 | /* Evict a userptr BO by stopping the queues if necessary | ||
1617 | * | ||
1618 | * Runs in MMU notifier, may be in RECLAIM_FS context. This means it | ||
1619 | * cannot do any memory allocations, and cannot take any locks that | ||
1620 | * are held elsewhere while allocating memory. Therefore this is as | ||
1621 | * simple as possible, using atomic counters. | ||
1622 | * | ||
1623 | * It doesn't do anything to the BO itself. The real work happens in | ||
1624 | * restore, where we get updated page addresses. This function only | ||
1625 | * ensures that GPU access to the BO is stopped. | ||
1626 | */ | ||
1421 | int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, | 1627 | int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, |
1422 | struct mm_struct *mm) | 1628 | struct mm_struct *mm) |
1423 | { | 1629 | { |
1424 | /* TODO */ | 1630 | struct amdkfd_process_info *process_info = mem->process_info; |
1631 | int invalid, evicted_bos; | ||
1632 | int r = 0; | ||
1633 | |||
1634 | invalid = atomic_inc_return(&mem->invalid); | ||
1635 | evicted_bos = atomic_inc_return(&process_info->evicted_bos); | ||
1636 | if (evicted_bos == 1) { | ||
1637 | /* First eviction, stop the queues */ | ||
1638 | r = kgd2kfd->quiesce_mm(mm); | ||
1639 | if (r) | ||
1640 | pr_err("Failed to quiesce KFD\n"); | ||
1641 | schedule_delayed_work(&process_info->restore_userptr_work, | ||
1642 | msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); | ||
1643 | } | ||
1644 | |||
1645 | return r; | ||
1646 | } | ||
1647 | |||
1648 | /* Update invalid userptr BOs | ||
1649 | * | ||
1650 | * Moves invalidated (evicted) userptr BOs from userptr_valid_list to | ||
1651 | * userptr_inval_list and updates user pages for all BOs that have | ||
1652 | * been invalidated since their last update. | ||
1653 | */ | ||
1654 | static int update_invalid_user_pages(struct amdkfd_process_info *process_info, | ||
1655 | struct mm_struct *mm) | ||
1656 | { | ||
1657 | struct kgd_mem *mem, *tmp_mem; | ||
1658 | struct amdgpu_bo *bo; | ||
1659 | struct ttm_operation_ctx ctx = { false, false }; | ||
1660 | int invalid, ret; | ||
1661 | |||
1662 | /* Move all invalidated BOs to the userptr_inval_list and | ||
1663 | * release their user pages by migration to the CPU domain | ||
1664 | */ | ||
1665 | list_for_each_entry_safe(mem, tmp_mem, | ||
1666 | &process_info->userptr_valid_list, | ||
1667 | validate_list.head) { | ||
1668 | if (!atomic_read(&mem->invalid)) | ||
1669 | continue; /* BO is still valid */ | ||
1670 | |||
1671 | bo = mem->bo; | ||
1672 | |||
1673 | if (amdgpu_bo_reserve(bo, true)) | ||
1674 | return -EAGAIN; | ||
1675 | amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); | ||
1676 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||
1677 | amdgpu_bo_unreserve(bo); | ||
1678 | if (ret) { | ||
1679 | pr_err("%s: Failed to invalidate userptr BO\n", | ||
1680 | __func__); | ||
1681 | return -EAGAIN; | ||
1682 | } | ||
1683 | |||
1684 | list_move_tail(&mem->validate_list.head, | ||
1685 | &process_info->userptr_inval_list); | ||
1686 | } | ||
1687 | |||
1688 | if (list_empty(&process_info->userptr_inval_list)) | ||
1689 | return 0; /* All evicted userptr BOs were freed */ | ||
1690 | |||
1691 | /* Go through userptr_inval_list and update any invalid user_pages */ | ||
1692 | list_for_each_entry(mem, &process_info->userptr_inval_list, | ||
1693 | validate_list.head) { | ||
1694 | invalid = atomic_read(&mem->invalid); | ||
1695 | if (!invalid) | ||
1696 | /* BO hasn't been invalidated since the last | ||
1697 | * revalidation attempt. Keep its BO list. | ||
1698 | */ | ||
1699 | continue; | ||
1700 | |||
1701 | bo = mem->bo; | ||
1702 | |||
1703 | if (!mem->user_pages) { | ||
1704 | mem->user_pages = | ||
1705 | kvmalloc_array(bo->tbo.ttm->num_pages, | ||
1706 | sizeof(struct page *), | ||
1707 | GFP_KERNEL | __GFP_ZERO); | ||
1708 | if (!mem->user_pages) { | ||
1709 | pr_err("%s: Failed to allocate pages array\n", | ||
1710 | __func__); | ||
1711 | return -ENOMEM; | ||
1712 | } | ||
1713 | } else if (mem->user_pages[0]) { | ||
1714 | release_pages(mem->user_pages, bo->tbo.ttm->num_pages); | ||
1715 | } | ||
1716 | |||
1717 | /* Get updated user pages */ | ||
1718 | ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, | ||
1719 | mem->user_pages); | ||
1720 | if (ret) { | ||
1721 | mem->user_pages[0] = NULL; | ||
1722 | pr_info("%s: Failed to get user pages: %d\n", | ||
1723 | __func__, ret); | ||
1724 | /* Pretend it succeeded. It will fail later | ||
1725 | * with a VM fault if the GPU tries to access | ||
1726 | * it. Better than hanging indefinitely with | ||
1727 | * stalled user mode queues. | ||
1728 | */ | ||
1729 | } | ||
1730 | |||
1731 | /* Mark the BO as valid unless it was invalidated | ||
1732 | * again concurrently | ||
1733 | */ | ||
1734 | if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) | ||
1735 | return -EAGAIN; | ||
1736 | } | ||
1737 | |||
1425 | return 0; | 1738 | return 0; |
1426 | } | 1739 | } |
1427 | 1740 | ||
1741 | /* Validate invalid userptr BOs | ||
1742 | * | ||
1743 | * Validates BOs on the userptr_inval_list, and moves them back to the | ||
1744 | * userptr_valid_list. Also updates GPUVM page tables with new page | ||
1745 | * addresses and waits for the page table updates to complete. | ||
1746 | */ | ||
1747 | static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) | ||
1748 | { | ||
1749 | struct amdgpu_bo_list_entry *pd_bo_list_entries; | ||
1750 | struct list_head resv_list, duplicates; | ||
1751 | struct ww_acquire_ctx ticket; | ||
1752 | struct amdgpu_sync sync; | ||
1753 | |||
1754 | struct amdgpu_vm *peer_vm; | ||
1755 | struct kgd_mem *mem, *tmp_mem; | ||
1756 | struct amdgpu_bo *bo; | ||
1757 | struct ttm_operation_ctx ctx = { false, false }; | ||
1758 | int i, ret; | ||
1759 | |||
1760 | pd_bo_list_entries = kcalloc(process_info->n_vms, | ||
1761 | sizeof(struct amdgpu_bo_list_entry), | ||
1762 | GFP_KERNEL); | ||
1763 | if (!pd_bo_list_entries) { | ||
1764 | pr_err("%s: Failed to allocate PD BO list entries\n", __func__); | ||
1765 | return -ENOMEM; | ||
1766 | } | ||
1767 | |||
1768 | INIT_LIST_HEAD(&resv_list); | ||
1769 | INIT_LIST_HEAD(&duplicates); | ||
1770 | |||
1771 | /* Get all the page directory BOs that need to be reserved */ | ||
1772 | i = 0; | ||
1773 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
1774 | vm_list_node) | ||
1775 | amdgpu_vm_get_pd_bo(peer_vm, &resv_list, | ||
1776 | &pd_bo_list_entries[i++]); | ||
1777 | /* Add the userptr_inval_list entries to resv_list */ | ||
1778 | list_for_each_entry(mem, &process_info->userptr_inval_list, | ||
1779 | validate_list.head) { | ||
1780 | list_add_tail(&mem->resv_list.head, &resv_list); | ||
1781 | mem->resv_list.bo = mem->validate_list.bo; | ||
1782 | mem->resv_list.shared = mem->validate_list.shared; | ||
1783 | } | ||
1784 | |||
1785 | /* Reserve all BOs and page tables for validation */ | ||
1786 | ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); | ||
1787 | WARN(!list_empty(&duplicates), "Duplicates should be empty"); | ||
1788 | if (ret) | ||
1789 | goto out; | ||
1790 | |||
1791 | amdgpu_sync_create(&sync); | ||
1792 | |||
1793 | /* Avoid triggering eviction fences when unmapping invalid | ||
1794 | * userptr BOs (waits for all fences, doesn't use | ||
1795 | * FENCE_OWNER_VM) | ||
1796 | */ | ||
1797 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
1798 | vm_list_node) | ||
1799 | amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, | ||
1800 | process_info->eviction_fence, | ||
1801 | NULL, NULL); | ||
1802 | |||
1803 | ret = process_validate_vms(process_info); | ||
1804 | if (ret) | ||
1805 | goto unreserve_out; | ||
1806 | |||
1807 | /* Validate BOs and update GPUVM page tables */ | ||
1808 | list_for_each_entry_safe(mem, tmp_mem, | ||
1809 | &process_info->userptr_inval_list, | ||
1810 | validate_list.head) { | ||
1811 | struct kfd_bo_va_list *bo_va_entry; | ||
1812 | |||
1813 | bo = mem->bo; | ||
1814 | |||
1815 | /* Copy pages array and validate the BO if we got user pages */ | ||
1816 | if (mem->user_pages[0]) { | ||
1817 | amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, | ||
1818 | mem->user_pages); | ||
1819 | amdgpu_ttm_placement_from_domain(bo, mem->domain); | ||
1820 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||
1821 | if (ret) { | ||
1822 | pr_err("%s: failed to validate BO\n", __func__); | ||
1823 | goto unreserve_out; | ||
1824 | } | ||
1825 | } | ||
1826 | |||
1827 | /* Validate succeeded, now the BO owns the pages, free | ||
1828 | * our copy of the pointer array. Put this BO back on | ||
1829 | * the userptr_valid_list. If we need to revalidate | ||
1830 | * it, we need to start from scratch. | ||
1831 | */ | ||
1832 | kvfree(mem->user_pages); | ||
1833 | mem->user_pages = NULL; | ||
1834 | list_move_tail(&mem->validate_list.head, | ||
1835 | &process_info->userptr_valid_list); | ||
1836 | |||
1837 | /* Update mapping. If the BO was not validated | ||
1838 | * (because we couldn't get user pages), this will | ||
1839 | * clear the page table entries, which will result in | ||
1840 | * VM faults if the GPU tries to access the invalid | ||
1841 | * memory. | ||
1842 | */ | ||
1843 | list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { | ||
1844 | if (!bo_va_entry->is_mapped) | ||
1845 | continue; | ||
1846 | |||
1847 | ret = update_gpuvm_pte((struct amdgpu_device *) | ||
1848 | bo_va_entry->kgd_dev, | ||
1849 | bo_va_entry, &sync); | ||
1850 | if (ret) { | ||
1851 | pr_err("%s: update PTE failed\n", __func__); | ||
1852 | /* make sure this gets validated again */ | ||
1853 | atomic_inc(&mem->invalid); | ||
1854 | goto unreserve_out; | ||
1855 | } | ||
1856 | } | ||
1857 | } | ||
1858 | |||
1859 | /* Update page directories */ | ||
1860 | ret = process_update_pds(process_info, &sync); | ||
1861 | |||
1862 | unreserve_out: | ||
1863 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
1864 | vm_list_node) | ||
1865 | amdgpu_bo_fence(peer_vm->root.base.bo, | ||
1866 | &process_info->eviction_fence->base, true); | ||
1867 | ttm_eu_backoff_reservation(&ticket, &resv_list); | ||
1868 | amdgpu_sync_wait(&sync, false); | ||
1869 | amdgpu_sync_free(&sync); | ||
1870 | out: | ||
1871 | kfree(pd_bo_list_entries); | ||
1872 | |||
1873 | return ret; | ||
1874 | } | ||
1875 | |||
1876 | /* Worker callback to restore evicted userptr BOs | ||
1877 | * | ||
1878 | * Tries to update and validate all userptr BOs. If successful and no | ||
1879 | * concurrent evictions happened, the queues are restarted. Otherwise, | ||
1880 | * reschedule for another attempt later. | ||
1881 | */ | ||
1882 | static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) | ||
1883 | { | ||
1884 | struct delayed_work *dwork = to_delayed_work(work); | ||
1885 | struct amdkfd_process_info *process_info = | ||
1886 | container_of(dwork, struct amdkfd_process_info, | ||
1887 | restore_userptr_work); | ||
1888 | struct task_struct *usertask; | ||
1889 | struct mm_struct *mm; | ||
1890 | int evicted_bos; | ||
1891 | |||
1892 | evicted_bos = atomic_read(&process_info->evicted_bos); | ||
1893 | if (!evicted_bos) | ||
1894 | return; | ||
1895 | |||
1896 | /* Reference task and mm in case of concurrent process termination */ | ||
1897 | usertask = get_pid_task(process_info->pid, PIDTYPE_PID); | ||
1898 | if (!usertask) | ||
1899 | return; | ||
1900 | mm = get_task_mm(usertask); | ||
1901 | if (!mm) { | ||
1902 | put_task_struct(usertask); | ||
1903 | return; | ||
1904 | } | ||
1905 | |||
1906 | mutex_lock(&process_info->lock); | ||
1907 | |||
1908 | if (update_invalid_user_pages(process_info, mm)) | ||
1909 | goto unlock_out; | ||
1910 | /* userptr_inval_list can be empty if all evicted userptr BOs | ||
1911 | * have been freed. In that case there is nothing to validate | ||
1912 | * and we can just restart the queues. | ||
1913 | */ | ||
1914 | if (!list_empty(&process_info->userptr_inval_list)) { | ||
1915 | if (atomic_read(&process_info->evicted_bos) != evicted_bos) | ||
1916 | goto unlock_out; /* Concurrent eviction, try again */ | ||
1917 | |||
1918 | if (validate_invalid_user_pages(process_info)) | ||
1919 | goto unlock_out; | ||
1920 | } | ||
1921 | /* Final check for concurrent evicton and atomic update. If | ||
1922 | * another eviction happens after successful update, it will | ||
1923 | * be a first eviction that calls quiesce_mm. The eviction | ||
1924 | * reference counting inside KFD will handle this case. | ||
1925 | */ | ||
1926 | if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != | ||
1927 | evicted_bos) | ||
1928 | goto unlock_out; | ||
1929 | evicted_bos = 0; | ||
1930 | if (kgd2kfd->resume_mm(mm)) { | ||
1931 | pr_err("%s: Failed to resume KFD\n", __func__); | ||
1932 | /* No recovery from this failure. Probably the CP is | ||
1933 | * hanging. No point trying again. | ||
1934 | */ | ||
1935 | } | ||
1936 | unlock_out: | ||
1937 | mutex_unlock(&process_info->lock); | ||
1938 | mmput(mm); | ||
1939 | put_task_struct(usertask); | ||
1940 | |||
1941 | /* If validation failed, reschedule another attempt */ | ||
1942 | if (evicted_bos) | ||
1943 | schedule_delayed_work(&process_info->restore_userptr_work, | ||
1944 | msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); | ||
1945 | } | ||
1946 | |||
1428 | /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given | 1947 | /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given |
1429 | * KFD process identified by process_info | 1948 | * KFD process identified by process_info |
1430 | * | 1949 | * |