diff options
author | Dave Airlie <airlied@redhat.com> | 2018-03-13 21:06:38 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-03-13 21:06:38 -0400 |
commit | 6fa7324ac5489ad43c4b6351355b869bc5458bef (patch) | |
tree | 97de1061f074d0a76c83d8cb364c67094a33a0fa | |
parent | 0b8eeac5c6ca6dcb19cce04bf8910006ac73dbd3 (diff) | |
parent | a11024457d348672b26b3d4581ed19c793399b48 (diff) |
Merge tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux into drm-next
Major points for this pull request:
- Add dGPU support for amdkfd initialization code and queue handling. It's
not complete support since the GPUVM part is missing (the under debate stuff).
- Enable PCIe atomics for dGPU if present
- Various adjustments to the amdgpu<-->amdkfd interface for dGPUs
- Refactor IOMMUv2 code to allow loading amdkfd without IOMMUv2 in the system
- Add HSA process eviction code in case of system memory pressure
- Various fixes and small changes
* tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux: (24 commits)
uapi: Fix type used in ioctl parameter structures
drm/amdkfd: Implement KFD process eviction/restore
drm/amdkfd: Add GPUVM virtual address space to PDD
drm/amdkfd: Remove unaligned memory access
drm/amdkfd: Centralize IOMMUv2 code and make it conditional
drm/amdgpu: Add submit IB function for KFD
drm/amdgpu: Add GPUVM memory management functions for KFD
drm/amdgpu: add amdgpu_sync_clone
drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support
drm/amdgpu: Add KFD eviction fence
drm/amdgpu: Remove unused kfd2kgd interface
drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid
drm/amdgpu: Fix header file dependencies
drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem
drm/amdgpu: remove useless BUG_ONs
drm/amdgpu: Enable KFD initialization on dGPUs
drm/amdkfd: Add dGPU device IDs and device info
drm/amdkfd: Add dGPU support to kernel_queue_init
drm/amdkfd: Add dGPU support to the MQD manager
drm/amdkfd: Add dGPU support to the device queue manager
...
41 files changed, 3757 insertions, 362 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 13c8ec11135a..dccae57985fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -766,6 +766,8 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |||
766 | F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 766 | F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |
767 | F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 767 | F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |
768 | F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 768 | F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |
769 | F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | ||
770 | F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | ||
769 | F: drivers/gpu/drm/amd/amdkfd/ | 771 | F: drivers/gpu/drm/amd/amdkfd/ |
770 | F: drivers/gpu/drm/amd/include/cik_structs.h | 772 | F: drivers/gpu/drm/amd/include/cik_structs.h |
771 | F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 773 | F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h |
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 5d101c4053e0..4edb9fd3cf47 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c | |||
@@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref) | |||
171 | 171 | ||
172 | trace_dma_fence_destroy(fence); | 172 | trace_dma_fence_destroy(fence); |
173 | 173 | ||
174 | /* Failed to signal before release, could be a refcounting issue */ | ||
174 | WARN_ON(!list_empty(&fence->cb_list)); | 175 | WARN_ON(!list_empty(&fence->cb_list)); |
175 | 176 | ||
176 | if (fence->ops->release) | 177 | if (fence->ops->release) |
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 353c937d947d..8522c2ea1f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile | |||
@@ -129,6 +129,8 @@ amdgpu-y += \ | |||
129 | # add amdkfd interfaces | 129 | # add amdkfd interfaces |
130 | amdgpu-y += \ | 130 | amdgpu-y += \ |
131 | amdgpu_amdkfd.o \ | 131 | amdgpu_amdkfd.o \ |
132 | amdgpu_amdkfd_fence.o \ | ||
133 | amdgpu_amdkfd_gpuvm.o \ | ||
132 | amdgpu_amdkfd_gfx_v8.o | 134 | amdgpu_amdkfd_gfx_v8.o |
133 | 135 | ||
134 | # add cgs | 136 | # add cgs |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 450426dbed92..8a23aa8f9c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |||
@@ -30,6 +30,8 @@ | |||
30 | const struct kgd2kfd_calls *kgd2kfd; | 30 | const struct kgd2kfd_calls *kgd2kfd; |
31 | bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); | 31 | bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); |
32 | 32 | ||
33 | static const unsigned int compute_vmid_bitmap = 0xFF00; | ||
34 | |||
33 | int amdgpu_amdkfd_init(void) | 35 | int amdgpu_amdkfd_init(void) |
34 | { | 36 | { |
35 | int ret; | 37 | int ret; |
@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void) | |||
56 | #else | 58 | #else |
57 | ret = -ENOENT; | 59 | ret = -ENOENT; |
58 | #endif | 60 | #endif |
61 | amdgpu_amdkfd_gpuvm_init_mem_limits(); | ||
59 | 62 | ||
60 | return ret; | 63 | return ret; |
61 | } | 64 | } |
@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) | |||
78 | switch (adev->asic_type) { | 81 | switch (adev->asic_type) { |
79 | #ifdef CONFIG_DRM_AMDGPU_CIK | 82 | #ifdef CONFIG_DRM_AMDGPU_CIK |
80 | case CHIP_KAVERI: | 83 | case CHIP_KAVERI: |
84 | case CHIP_HAWAII: | ||
81 | kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); | 85 | kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); |
82 | break; | 86 | break; |
83 | #endif | 87 | #endif |
84 | case CHIP_CARRIZO: | 88 | case CHIP_CARRIZO: |
89 | case CHIP_TONGA: | ||
90 | case CHIP_FIJI: | ||
91 | case CHIP_POLARIS10: | ||
92 | case CHIP_POLARIS11: | ||
85 | kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); | 93 | kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); |
86 | break; | 94 | break; |
87 | default: | 95 | default: |
@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | |||
132 | int last_valid_bit; | 140 | int last_valid_bit; |
133 | if (adev->kfd) { | 141 | if (adev->kfd) { |
134 | struct kgd2kfd_shared_resources gpu_resources = { | 142 | struct kgd2kfd_shared_resources gpu_resources = { |
135 | .compute_vmid_bitmap = 0xFF00, | 143 | .compute_vmid_bitmap = compute_vmid_bitmap, |
136 | .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, | 144 | .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, |
137 | .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe | 145 | .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, |
146 | .gpuvm_size = min(adev->vm_manager.max_pfn | ||
147 | << AMDGPU_GPU_PAGE_SHIFT, | ||
148 | AMDGPU_VA_HOLE_START), | ||
149 | .drm_render_minor = adev->ddev->render->index | ||
138 | }; | 150 | }; |
139 | 151 | ||
140 | /* this is going to have a few of the MSBs set that we need to | 152 | /* this is going to have a few of the MSBs set that we need to |
@@ -204,19 +216,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | |||
204 | void **cpu_ptr) | 216 | void **cpu_ptr) |
205 | { | 217 | { |
206 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | 218 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
207 | struct kgd_mem **mem = (struct kgd_mem **) mem_obj; | 219 | struct amdgpu_bo *bo = NULL; |
208 | int r; | 220 | int r; |
209 | 221 | uint64_t gpu_addr_tmp = 0; | |
210 | BUG_ON(kgd == NULL); | 222 | void *cpu_ptr_tmp = NULL; |
211 | BUG_ON(gpu_addr == NULL); | ||
212 | BUG_ON(cpu_ptr == NULL); | ||
213 | |||
214 | *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); | ||
215 | if ((*mem) == NULL) | ||
216 | return -ENOMEM; | ||
217 | 223 | ||
218 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, | 224 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, |
219 | AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); | 225 | AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo); |
220 | if (r) { | 226 | if (r) { |
221 | dev_err(adev->dev, | 227 | dev_err(adev->dev, |
222 | "failed to allocate BO for amdkfd (%d)\n", r); | 228 | "failed to allocate BO for amdkfd (%d)\n", r); |
@@ -224,54 +230,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | |||
224 | } | 230 | } |
225 | 231 | ||
226 | /* map the buffer */ | 232 | /* map the buffer */ |
227 | r = amdgpu_bo_reserve((*mem)->bo, true); | 233 | r = amdgpu_bo_reserve(bo, true); |
228 | if (r) { | 234 | if (r) { |
229 | dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); | 235 | dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); |
230 | goto allocate_mem_reserve_bo_failed; | 236 | goto allocate_mem_reserve_bo_failed; |
231 | } | 237 | } |
232 | 238 | ||
233 | r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, | 239 | r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, |
234 | &(*mem)->gpu_addr); | 240 | &gpu_addr_tmp); |
235 | if (r) { | 241 | if (r) { |
236 | dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); | 242 | dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); |
237 | goto allocate_mem_pin_bo_failed; | 243 | goto allocate_mem_pin_bo_failed; |
238 | } | 244 | } |
239 | *gpu_addr = (*mem)->gpu_addr; | ||
240 | 245 | ||
241 | r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); | 246 | r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); |
242 | if (r) { | 247 | if (r) { |
243 | dev_err(adev->dev, | 248 | dev_err(adev->dev, |
244 | "(%d) failed to map bo to kernel for amdkfd\n", r); | 249 | "(%d) failed to map bo to kernel for amdkfd\n", r); |
245 | goto allocate_mem_kmap_bo_failed; | 250 | goto allocate_mem_kmap_bo_failed; |
246 | } | 251 | } |
247 | *cpu_ptr = (*mem)->cpu_ptr; | ||
248 | 252 | ||
249 | amdgpu_bo_unreserve((*mem)->bo); | 253 | *mem_obj = bo; |
254 | *gpu_addr = gpu_addr_tmp; | ||
255 | *cpu_ptr = cpu_ptr_tmp; | ||
256 | |||
257 | amdgpu_bo_unreserve(bo); | ||
250 | 258 | ||
251 | return 0; | 259 | return 0; |
252 | 260 | ||
253 | allocate_mem_kmap_bo_failed: | 261 | allocate_mem_kmap_bo_failed: |
254 | amdgpu_bo_unpin((*mem)->bo); | 262 | amdgpu_bo_unpin(bo); |
255 | allocate_mem_pin_bo_failed: | 263 | allocate_mem_pin_bo_failed: |
256 | amdgpu_bo_unreserve((*mem)->bo); | 264 | amdgpu_bo_unreserve(bo); |
257 | allocate_mem_reserve_bo_failed: | 265 | allocate_mem_reserve_bo_failed: |
258 | amdgpu_bo_unref(&(*mem)->bo); | 266 | amdgpu_bo_unref(&bo); |
259 | 267 | ||
260 | return r; | 268 | return r; |
261 | } | 269 | } |
262 | 270 | ||
263 | void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) | 271 | void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) |
264 | { | 272 | { |
265 | struct kgd_mem *mem = (struct kgd_mem *) mem_obj; | 273 | struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; |
266 | 274 | ||
267 | BUG_ON(mem == NULL); | 275 | amdgpu_bo_reserve(bo, true); |
268 | 276 | amdgpu_bo_kunmap(bo); | |
269 | amdgpu_bo_reserve(mem->bo, true); | 277 | amdgpu_bo_unpin(bo); |
270 | amdgpu_bo_kunmap(mem->bo); | 278 | amdgpu_bo_unreserve(bo); |
271 | amdgpu_bo_unpin(mem->bo); | 279 | amdgpu_bo_unref(&(bo)); |
272 | amdgpu_bo_unreserve(mem->bo); | ||
273 | amdgpu_bo_unref(&(mem->bo)); | ||
274 | kfree(mem); | ||
275 | } | 280 | } |
276 | 281 | ||
277 | void get_local_mem_info(struct kgd_dev *kgd, | 282 | void get_local_mem_info(struct kgd_dev *kgd, |
@@ -361,3 +366,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) | |||
361 | 366 | ||
362 | return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); | 367 | return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
363 | } | 368 | } |
369 | |||
370 | int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, | ||
371 | uint32_t vmid, uint64_t gpu_addr, | ||
372 | uint32_t *ib_cmd, uint32_t ib_len) | ||
373 | { | ||
374 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | ||
375 | struct amdgpu_job *job; | ||
376 | struct amdgpu_ib *ib; | ||
377 | struct amdgpu_ring *ring; | ||
378 | struct dma_fence *f = NULL; | ||
379 | int ret; | ||
380 | |||
381 | switch (engine) { | ||
382 | case KGD_ENGINE_MEC1: | ||
383 | ring = &adev->gfx.compute_ring[0]; | ||
384 | break; | ||
385 | case KGD_ENGINE_SDMA1: | ||
386 | ring = &adev->sdma.instance[0].ring; | ||
387 | break; | ||
388 | case KGD_ENGINE_SDMA2: | ||
389 | ring = &adev->sdma.instance[1].ring; | ||
390 | break; | ||
391 | default: | ||
392 | pr_err("Invalid engine in IB submission: %d\n", engine); | ||
393 | ret = -EINVAL; | ||
394 | goto err; | ||
395 | } | ||
396 | |||
397 | ret = amdgpu_job_alloc(adev, 1, &job, NULL); | ||
398 | if (ret) | ||
399 | goto err; | ||
400 | |||
401 | ib = &job->ibs[0]; | ||
402 | memset(ib, 0, sizeof(struct amdgpu_ib)); | ||
403 | |||
404 | ib->gpu_addr = gpu_addr; | ||
405 | ib->ptr = ib_cmd; | ||
406 | ib->length_dw = ib_len; | ||
407 | /* This works for NO_HWS. TODO: need to handle without knowing VMID */ | ||
408 | job->vmid = vmid; | ||
409 | |||
410 | ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); | ||
411 | if (ret) { | ||
412 | DRM_ERROR("amdgpu: failed to schedule IB.\n"); | ||
413 | goto err_ib_sched; | ||
414 | } | ||
415 | |||
416 | ret = dma_fence_wait(f, false); | ||
417 | |||
418 | err_ib_sched: | ||
419 | dma_fence_put(f); | ||
420 | amdgpu_job_free(job); | ||
421 | err: | ||
422 | return ret; | ||
423 | } | ||
424 | |||
425 | bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) | ||
426 | { | ||
427 | if (adev->kfd) { | ||
428 | if ((1 << vmid) & compute_vmid_bitmap) | ||
429 | return true; | ||
430 | } | ||
431 | |||
432 | return false; | ||
433 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2a519f9062ee..d7509b706b26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | |||
@@ -28,13 +28,89 @@ | |||
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/mmu_context.h> | 29 | #include <linux/mmu_context.h> |
30 | #include <kgd_kfd_interface.h> | 30 | #include <kgd_kfd_interface.h> |
31 | #include <drm/ttm/ttm_execbuf_util.h> | ||
32 | #include "amdgpu_sync.h" | ||
33 | #include "amdgpu_vm.h" | ||
34 | |||
35 | extern const struct kgd2kfd_calls *kgd2kfd; | ||
31 | 36 | ||
32 | struct amdgpu_device; | 37 | struct amdgpu_device; |
33 | 38 | ||
39 | struct kfd_bo_va_list { | ||
40 | struct list_head bo_list; | ||
41 | struct amdgpu_bo_va *bo_va; | ||
42 | void *kgd_dev; | ||
43 | bool is_mapped; | ||
44 | uint64_t va; | ||
45 | uint64_t pte_flags; | ||
46 | }; | ||
47 | |||
34 | struct kgd_mem { | 48 | struct kgd_mem { |
49 | struct mutex lock; | ||
35 | struct amdgpu_bo *bo; | 50 | struct amdgpu_bo *bo; |
36 | uint64_t gpu_addr; | 51 | struct list_head bo_va_list; |
37 | void *cpu_ptr; | 52 | /* protected by amdkfd_process_info.lock */ |
53 | struct ttm_validate_buffer validate_list; | ||
54 | struct ttm_validate_buffer resv_list; | ||
55 | uint32_t domain; | ||
56 | unsigned int mapped_to_gpu_memory; | ||
57 | uint64_t va; | ||
58 | |||
59 | uint32_t mapping_flags; | ||
60 | |||
61 | struct amdkfd_process_info *process_info; | ||
62 | |||
63 | struct amdgpu_sync sync; | ||
64 | |||
65 | bool aql_queue; | ||
66 | }; | ||
67 | |||
68 | /* KFD Memory Eviction */ | ||
69 | struct amdgpu_amdkfd_fence { | ||
70 | struct dma_fence base; | ||
71 | struct mm_struct *mm; | ||
72 | spinlock_t lock; | ||
73 | char timeline_name[TASK_COMM_LEN]; | ||
74 | }; | ||
75 | |||
76 | struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, | ||
77 | struct mm_struct *mm); | ||
78 | bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); | ||
79 | struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); | ||
80 | |||
81 | struct amdkfd_process_info { | ||
82 | /* List head of all VMs that belong to a KFD process */ | ||
83 | struct list_head vm_list_head; | ||
84 | /* List head for all KFD BOs that belong to a KFD process. */ | ||
85 | struct list_head kfd_bo_list; | ||
86 | /* Lock to protect kfd_bo_list */ | ||
87 | struct mutex lock; | ||
88 | |||
89 | /* Number of VMs */ | ||
90 | unsigned int n_vms; | ||
91 | /* Eviction Fence */ | ||
92 | struct amdgpu_amdkfd_fence *eviction_fence; | ||
93 | }; | ||
94 | |||
95 | /* struct amdkfd_vm - | ||
96 | * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs | ||
97 | * belonging to a KFD process. All the VMs belonging to the same process point | ||
98 | * to the same amdkfd_process_info. | ||
99 | */ | ||
100 | struct amdkfd_vm { | ||
101 | /* Keep base as the first parameter for pointer compatibility between | ||
102 | * amdkfd_vm and amdgpu_vm. | ||
103 | */ | ||
104 | struct amdgpu_vm base; | ||
105 | |||
106 | /* List node in amdkfd_process_info.vm_list_head*/ | ||
107 | struct list_head vm_list_node; | ||
108 | |||
109 | struct amdgpu_device *adev; | ||
110 | /* Points to the KFD process VM info*/ | ||
111 | struct amdkfd_process_info *process_info; | ||
112 | |||
113 | uint64_t pd_phys_addr; | ||
38 | }; | 114 | }; |
39 | 115 | ||
40 | int amdgpu_amdkfd_init(void); | 116 | int amdgpu_amdkfd_init(void); |
@@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); | |||
48 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); | 124 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); |
49 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); | 125 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); |
50 | 126 | ||
127 | int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, | ||
128 | uint32_t vmid, uint64_t gpu_addr, | ||
129 | uint32_t *ib_cmd, uint32_t ib_len); | ||
130 | |||
51 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); | 131 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); |
52 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); | 132 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); |
53 | 133 | ||
134 | bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); | ||
135 | |||
54 | /* Shared API */ | 136 | /* Shared API */ |
55 | int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | 137 | int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, |
56 | void **mem_obj, uint64_t *gpu_addr, | 138 | void **mem_obj, uint64_t *gpu_addr, |
@@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); | |||
79 | valid; \ | 161 | valid; \ |
80 | }) | 162 | }) |
81 | 163 | ||
164 | /* GPUVM API */ | ||
165 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, | ||
166 | void **process_info, | ||
167 | struct dma_fence **ef); | ||
168 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); | ||
169 | uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); | ||
170 | int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | ||
171 | struct kgd_dev *kgd, uint64_t va, uint64_t size, | ||
172 | void *vm, struct kgd_mem **mem, | ||
173 | uint64_t *offset, uint32_t flags); | ||
174 | int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( | ||
175 | struct kgd_dev *kgd, struct kgd_mem *mem); | ||
176 | int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | ||
177 | struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); | ||
178 | int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( | ||
179 | struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); | ||
180 | int amdgpu_amdkfd_gpuvm_sync_memory( | ||
181 | struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); | ||
182 | int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, | ||
183 | struct kgd_mem *mem, void **kptr, uint64_t *size); | ||
184 | int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, | ||
185 | struct dma_fence **ef); | ||
186 | |||
187 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void); | ||
188 | void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); | ||
189 | |||
82 | #endif /* AMDGPU_AMDKFD_H_INCLUDED */ | 190 | #endif /* AMDGPU_AMDKFD_H_INCLUDED */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c new file mode 100644 index 000000000000..2c14025e5e76 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | |||
@@ -0,0 +1,179 @@ | |||
1 | /* | ||
2 | * Copyright 2016-2018 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <linux/dma-fence.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/atomic.h> | ||
26 | #include <linux/stacktrace.h> | ||
27 | #include <linux/sched.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/sched/mm.h> | ||
30 | #include "amdgpu_amdkfd.h" | ||
31 | |||
32 | static const struct dma_fence_ops amdkfd_fence_ops; | ||
33 | static atomic_t fence_seq = ATOMIC_INIT(0); | ||
34 | |||
35 | /* Eviction Fence | ||
36 | * Fence helper functions to deal with KFD memory eviction. | ||
37 | * Big Idea - Since KFD submissions are done by user queues, a BO cannot be | ||
38 | * evicted unless all the user queues for that process are evicted. | ||
39 | * | ||
40 | * All the BOs in a process share an eviction fence. When process X wants | ||
41 | * to map VRAM memory but TTM can't find enough space, TTM will attempt to | ||
42 | * evict BOs from its LRU list. TTM checks if the BO is valuable to evict | ||
43 | * by calling ttm_bo_driver->eviction_valuable(). | ||
44 | * | ||
45 | * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs | ||
46 | * to process X. Otherwise, it will return true to indicate BO can be | ||
47 | * evicted by TTM. | ||
48 | * | ||
49 | * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue | ||
50 | * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move | ||
51 | * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler. | ||
52 | * | ||
53 | * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to | ||
54 | * nofity when the BO is free to move. fence_add_callback --> enable_signaling | ||
55 | * --> amdgpu_amdkfd_fence.enable_signaling | ||
56 | * | ||
57 | * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce | ||
58 | * user queues and signal fence. The work item will also start another delayed | ||
59 | * work item to restore BOs | ||
60 | */ | ||
61 | |||
62 | struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, | ||
63 | struct mm_struct *mm) | ||
64 | { | ||
65 | struct amdgpu_amdkfd_fence *fence; | ||
66 | |||
67 | fence = kzalloc(sizeof(*fence), GFP_KERNEL); | ||
68 | if (fence == NULL) | ||
69 | return NULL; | ||
70 | |||
71 | /* This reference gets released in amdkfd_fence_release */ | ||
72 | mmgrab(mm); | ||
73 | fence->mm = mm; | ||
74 | get_task_comm(fence->timeline_name, current); | ||
75 | spin_lock_init(&fence->lock); | ||
76 | |||
77 | dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, | ||
78 | context, atomic_inc_return(&fence_seq)); | ||
79 | |||
80 | return fence; | ||
81 | } | ||
82 | |||
83 | struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) | ||
84 | { | ||
85 | struct amdgpu_amdkfd_fence *fence; | ||
86 | |||
87 | if (!f) | ||
88 | return NULL; | ||
89 | |||
90 | fence = container_of(f, struct amdgpu_amdkfd_fence, base); | ||
91 | if (fence && f->ops == &amdkfd_fence_ops) | ||
92 | return fence; | ||
93 | |||
94 | return NULL; | ||
95 | } | ||
96 | |||
97 | static const char *amdkfd_fence_get_driver_name(struct dma_fence *f) | ||
98 | { | ||
99 | return "amdgpu_amdkfd_fence"; | ||
100 | } | ||
101 | |||
102 | static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f) | ||
103 | { | ||
104 | struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); | ||
105 | |||
106 | return fence->timeline_name; | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict | ||
111 | * a KFD BO and schedules a job to move the BO. | ||
112 | * If fence is already signaled return true. | ||
113 | * If fence is not signaled schedule a evict KFD process work item. | ||
114 | */ | ||
115 | static bool amdkfd_fence_enable_signaling(struct dma_fence *f) | ||
116 | { | ||
117 | struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); | ||
118 | |||
119 | if (!fence) | ||
120 | return false; | ||
121 | |||
122 | if (dma_fence_is_signaled(f)) | ||
123 | return true; | ||
124 | |||
125 | if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f)) | ||
126 | return true; | ||
127 | |||
128 | return false; | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * amdkfd_fence_release - callback that fence can be freed | ||
133 | * | ||
134 | * @fence: fence | ||
135 | * | ||
136 | * This function is called when the reference count becomes zero. | ||
137 | * Drops the mm_struct reference and RCU schedules freeing up the fence. | ||
138 | */ | ||
139 | static void amdkfd_fence_release(struct dma_fence *f) | ||
140 | { | ||
141 | struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); | ||
142 | |||
143 | /* Unconditionally signal the fence. The process is getting | ||
144 | * terminated. | ||
145 | */ | ||
146 | if (WARN_ON(!fence)) | ||
147 | return; /* Not an amdgpu_amdkfd_fence */ | ||
148 | |||
149 | mmdrop(fence->mm); | ||
150 | kfree_rcu(f, rcu); | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f | ||
155 | * if same return TRUE else return FALSE. | ||
156 | * | ||
157 | * @f: [IN] fence | ||
158 | * @mm: [IN] mm that needs to be verified | ||
159 | */ | ||
160 | bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) | ||
161 | { | ||
162 | struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); | ||
163 | |||
164 | if (!fence) | ||
165 | return false; | ||
166 | else if (fence->mm == mm) | ||
167 | return true; | ||
168 | |||
169 | return false; | ||
170 | } | ||
171 | |||
172 | static const struct dma_fence_ops amdkfd_fence_ops = { | ||
173 | .get_driver_name = amdkfd_fence_get_driver_name, | ||
174 | .get_timeline_name = amdkfd_fence_get_timeline_name, | ||
175 | .enable_signaling = amdkfd_fence_enable_signaling, | ||
176 | .signaled = NULL, | ||
177 | .wait = dma_fence_default_wait, | ||
178 | .release = amdkfd_fence_release, | ||
179 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index a9e6aea0e5f8..7485c376b90e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | |||
@@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | |||
139 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); | 139 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); |
140 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | 140 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, |
141 | uint8_t vmid); | 141 | uint8_t vmid); |
142 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); | ||
143 | 142 | ||
144 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); | 143 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); |
145 | static void set_scratch_backing_va(struct kgd_dev *kgd, | 144 | static void set_scratch_backing_va(struct kgd_dev *kgd, |
146 | uint64_t va, uint32_t vmid); | 145 | uint64_t va, uint32_t vmid); |
146 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | ||
147 | uint32_t page_table_base); | ||
148 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); | ||
149 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); | ||
147 | 150 | ||
148 | /* Because of REG_GET_FIELD() being used, we put this function in the | 151 | /* Because of REG_GET_FIELD() being used, we put this function in the |
149 | * asic specific file. | 152 | * asic specific file. |
@@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
196 | .address_watch_get_offset = kgd_address_watch_get_offset, | 199 | .address_watch_get_offset = kgd_address_watch_get_offset, |
197 | .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, | 200 | .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, |
198 | .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, | 201 | .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, |
199 | .write_vmid_invalidate_request = write_vmid_invalidate_request, | ||
200 | .get_fw_version = get_fw_version, | 202 | .get_fw_version = get_fw_version, |
201 | .set_scratch_backing_va = set_scratch_backing_va, | 203 | .set_scratch_backing_va = set_scratch_backing_va, |
202 | .get_tile_config = get_tile_config, | 204 | .get_tile_config = get_tile_config, |
203 | .get_cu_info = get_cu_info, | 205 | .get_cu_info = get_cu_info, |
204 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage | 206 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, |
207 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | ||
208 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | ||
209 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | ||
210 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | ||
211 | .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, | ||
212 | .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, | ||
213 | .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, | ||
214 | .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, | ||
215 | .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, | ||
216 | .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, | ||
217 | .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, | ||
218 | .invalidate_tlbs = invalidate_tlbs, | ||
219 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | ||
220 | .submit_ib = amdgpu_amdkfd_submit_ib, | ||
205 | }; | 221 | }; |
206 | 222 | ||
207 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) | 223 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) |
@@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |||
787 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | 803 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
788 | 804 | ||
789 | reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); | 805 | reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); |
790 | return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; | 806 | return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; |
791 | } | ||
792 | |||
793 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) | ||
794 | { | ||
795 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
796 | |||
797 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | ||
798 | } | 807 | } |
799 | 808 | ||
800 | static void set_scratch_backing_va(struct kgd_dev *kgd, | 809 | static void set_scratch_backing_va(struct kgd_dev *kgd, |
@@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | |||
812 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | 821 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
813 | const union amdgpu_firmware_header *hdr; | 822 | const union amdgpu_firmware_header *hdr; |
814 | 823 | ||
815 | BUG_ON(kgd == NULL); | ||
816 | |||
817 | switch (type) { | 824 | switch (type) { |
818 | case KGD_ENGINE_PFP: | 825 | case KGD_ENGINE_PFP: |
819 | hdr = (const union amdgpu_firmware_header *) | 826 | hdr = (const union amdgpu_firmware_header *) |
@@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | |||
866 | return hdr->common.ucode_version; | 873 | return hdr->common.ucode_version; |
867 | } | 874 | } |
868 | 875 | ||
876 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | ||
877 | uint32_t page_table_base) | ||
878 | { | ||
879 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
880 | |||
881 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | ||
882 | pr_err("trying to set page table base for wrong VMID\n"); | ||
883 | return; | ||
884 | } | ||
885 | WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); | ||
886 | } | ||
887 | |||
888 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) | ||
889 | { | ||
890 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
891 | int vmid; | ||
892 | unsigned int tmp; | ||
893 | |||
894 | for (vmid = 0; vmid < 16; vmid++) { | ||
895 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) | ||
896 | continue; | ||
897 | |||
898 | tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); | ||
899 | if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && | ||
900 | (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { | ||
901 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | ||
902 | RREG32(mmVM_INVALIDATE_RESPONSE); | ||
903 | break; | ||
904 | } | ||
905 | } | ||
906 | |||
907 | return 0; | ||
908 | } | ||
909 | |||
910 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) | ||
911 | { | ||
912 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
913 | |||
914 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | ||
915 | pr_err("non kfd vmid\n"); | ||
916 | return 0; | ||
917 | } | ||
918 | |||
919 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | ||
920 | RREG32(mmVM_INVALIDATE_RESPONSE); | ||
921 | return 0; | ||
922 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index b127259d7d85..7be453494423 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | |||
@@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, | |||
81 | uint32_t queue_id); | 81 | uint32_t queue_id); |
82 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | 82 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, |
83 | unsigned int utimeout); | 83 | unsigned int utimeout); |
84 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); | ||
85 | static int kgd_address_watch_disable(struct kgd_dev *kgd); | 84 | static int kgd_address_watch_disable(struct kgd_dev *kgd); |
86 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | 85 | static int kgd_address_watch_execute(struct kgd_dev *kgd, |
87 | unsigned int watch_point_id, | 86 | unsigned int watch_point_id, |
@@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | |||
99 | uint8_t vmid); | 98 | uint8_t vmid); |
100 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | 99 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, |
101 | uint8_t vmid); | 100 | uint8_t vmid); |
102 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); | ||
103 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); | 101 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); |
104 | static void set_scratch_backing_va(struct kgd_dev *kgd, | 102 | static void set_scratch_backing_va(struct kgd_dev *kgd, |
105 | uint64_t va, uint32_t vmid); | 103 | uint64_t va, uint32_t vmid); |
104 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | ||
105 | uint32_t page_table_base); | ||
106 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); | ||
107 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); | ||
106 | 108 | ||
107 | /* Because of REG_GET_FIELD() being used, we put this function in the | 109 | /* Because of REG_GET_FIELD() being used, we put this function in the |
108 | * asic specific file. | 110 | * asic specific file. |
@@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
157 | get_atc_vmid_pasid_mapping_pasid, | 159 | get_atc_vmid_pasid_mapping_pasid, |
158 | .get_atc_vmid_pasid_mapping_valid = | 160 | .get_atc_vmid_pasid_mapping_valid = |
159 | get_atc_vmid_pasid_mapping_valid, | 161 | get_atc_vmid_pasid_mapping_valid, |
160 | .write_vmid_invalidate_request = write_vmid_invalidate_request, | ||
161 | .get_fw_version = get_fw_version, | 162 | .get_fw_version = get_fw_version, |
162 | .set_scratch_backing_va = set_scratch_backing_va, | 163 | .set_scratch_backing_va = set_scratch_backing_va, |
163 | .get_tile_config = get_tile_config, | 164 | .get_tile_config = get_tile_config, |
164 | .get_cu_info = get_cu_info, | 165 | .get_cu_info = get_cu_info, |
165 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage | 166 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, |
167 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | ||
168 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | ||
169 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | ||
170 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | ||
171 | .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, | ||
172 | .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, | ||
173 | .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, | ||
174 | .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, | ||
175 | .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, | ||
176 | .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, | ||
177 | .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, | ||
178 | .invalidate_tlbs = invalidate_tlbs, | ||
179 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | ||
180 | .submit_ib = amdgpu_amdkfd_submit_ib, | ||
166 | }; | 181 | }; |
167 | 182 | ||
168 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) | 183 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) |
@@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |||
704 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | 719 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
705 | 720 | ||
706 | reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); | 721 | reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); |
707 | return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; | 722 | return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; |
708 | } | ||
709 | |||
710 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) | ||
711 | { | ||
712 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
713 | |||
714 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | ||
715 | } | 723 | } |
716 | 724 | ||
717 | static int kgd_address_watch_disable(struct kgd_dev *kgd) | 725 | static int kgd_address_watch_disable(struct kgd_dev *kgd) |
@@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | |||
775 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | 783 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
776 | const union amdgpu_firmware_header *hdr; | 784 | const union amdgpu_firmware_header *hdr; |
777 | 785 | ||
778 | BUG_ON(kgd == NULL); | ||
779 | |||
780 | switch (type) { | 786 | switch (type) { |
781 | case KGD_ENGINE_PFP: | 787 | case KGD_ENGINE_PFP: |
782 | hdr = (const union amdgpu_firmware_header *) | 788 | hdr = (const union amdgpu_firmware_header *) |
@@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | |||
828 | /* Only 12 bit in use*/ | 834 | /* Only 12 bit in use*/ |
829 | return hdr->common.ucode_version; | 835 | return hdr->common.ucode_version; |
830 | } | 836 | } |
837 | |||
838 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | ||
839 | uint32_t page_table_base) | ||
840 | { | ||
841 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
842 | |||
843 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | ||
844 | pr_err("trying to set page table base for wrong VMID\n"); | ||
845 | return; | ||
846 | } | ||
847 | WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); | ||
848 | } | ||
849 | |||
850 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) | ||
851 | { | ||
852 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
853 | int vmid; | ||
854 | unsigned int tmp; | ||
855 | |||
856 | for (vmid = 0; vmid < 16; vmid++) { | ||
857 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) | ||
858 | continue; | ||
859 | |||
860 | tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); | ||
861 | if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && | ||
862 | (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { | ||
863 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | ||
864 | RREG32(mmVM_INVALIDATE_RESPONSE); | ||
865 | break; | ||
866 | } | ||
867 | } | ||
868 | |||
869 | return 0; | ||
870 | } | ||
871 | |||
872 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) | ||
873 | { | ||
874 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
875 | |||
876 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | ||
877 | pr_err("non kfd vmid %d\n", vmid); | ||
878 | return -EINVAL; | ||
879 | } | ||
880 | |||
881 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | ||
882 | RREG32(mmVM_INVALIDATE_RESPONSE); | ||
883 | return 0; | ||
884 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c new file mode 100644 index 000000000000..e0371a9967b9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |||
@@ -0,0 +1,1506 @@ | |||
1 | /* | ||
2 | * Copyright 2014-2018 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #define pr_fmt(fmt) "kfd2kgd: " fmt | ||
24 | |||
25 | #include <linux/list.h> | ||
26 | #include <drm/drmP.h> | ||
27 | #include "amdgpu_object.h" | ||
28 | #include "amdgpu_vm.h" | ||
29 | #include "amdgpu_amdkfd.h" | ||
30 | |||
31 | /* Special VM and GART address alignment needed for VI pre-Fiji due to | ||
32 | * a HW bug. | ||
33 | */ | ||
34 | #define VI_BO_SIZE_ALIGN (0x8000) | ||
35 | |||
36 | /* Impose limit on how much memory KFD can use */ | ||
37 | static struct { | ||
38 | uint64_t max_system_mem_limit; | ||
39 | int64_t system_mem_used; | ||
40 | spinlock_t mem_limit_lock; | ||
41 | } kfd_mem_limit; | ||
42 | |||
43 | /* Struct used for amdgpu_amdkfd_bo_validate */ | ||
44 | struct amdgpu_vm_parser { | ||
45 | uint32_t domain; | ||
46 | bool wait; | ||
47 | }; | ||
48 | |||
49 | static const char * const domain_bit_to_string[] = { | ||
50 | "CPU", | ||
51 | "GTT", | ||
52 | "VRAM", | ||
53 | "GDS", | ||
54 | "GWS", | ||
55 | "OA" | ||
56 | }; | ||
57 | |||
58 | #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] | ||
59 | |||
60 | |||
61 | |||
62 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) | ||
63 | { | ||
64 | return (struct amdgpu_device *)kgd; | ||
65 | } | ||
66 | |||
67 | static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, | ||
68 | struct kgd_mem *mem) | ||
69 | { | ||
70 | struct kfd_bo_va_list *entry; | ||
71 | |||
72 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) | ||
73 | if (entry->bo_va->base.vm == avm) | ||
74 | return false; | ||
75 | |||
76 | return true; | ||
77 | } | ||
78 | |||
79 | /* Set memory usage limits. Current, limits are | ||
80 | * System (kernel) memory - 3/8th System RAM | ||
81 | */ | ||
82 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) | ||
83 | { | ||
84 | struct sysinfo si; | ||
85 | uint64_t mem; | ||
86 | |||
87 | si_meminfo(&si); | ||
88 | mem = si.totalram - si.totalhigh; | ||
89 | mem *= si.mem_unit; | ||
90 | |||
91 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); | ||
92 | kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); | ||
93 | pr_debug("Kernel memory limit %lluM\n", | ||
94 | (kfd_mem_limit.max_system_mem_limit >> 20)); | ||
95 | } | ||
96 | |||
97 | static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, | ||
98 | uint64_t size, u32 domain) | ||
99 | { | ||
100 | size_t acc_size; | ||
101 | int ret = 0; | ||
102 | |||
103 | acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, | ||
104 | sizeof(struct amdgpu_bo)); | ||
105 | |||
106 | spin_lock(&kfd_mem_limit.mem_limit_lock); | ||
107 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { | ||
108 | if (kfd_mem_limit.system_mem_used + (acc_size + size) > | ||
109 | kfd_mem_limit.max_system_mem_limit) { | ||
110 | ret = -ENOMEM; | ||
111 | goto err_no_mem; | ||
112 | } | ||
113 | kfd_mem_limit.system_mem_used += (acc_size + size); | ||
114 | } | ||
115 | err_no_mem: | ||
116 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | ||
117 | return ret; | ||
118 | } | ||
119 | |||
120 | static void unreserve_system_mem_limit(struct amdgpu_device *adev, | ||
121 | uint64_t size, u32 domain) | ||
122 | { | ||
123 | size_t acc_size; | ||
124 | |||
125 | acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, | ||
126 | sizeof(struct amdgpu_bo)); | ||
127 | |||
128 | spin_lock(&kfd_mem_limit.mem_limit_lock); | ||
129 | if (domain == AMDGPU_GEM_DOMAIN_GTT) | ||
130 | kfd_mem_limit.system_mem_used -= (acc_size + size); | ||
131 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, | ||
132 | "kfd system memory accounting unbalanced"); | ||
133 | |||
134 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | ||
135 | } | ||
136 | |||
137 | void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) | ||
138 | { | ||
139 | spin_lock(&kfd_mem_limit.mem_limit_lock); | ||
140 | |||
141 | if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { | ||
142 | kfd_mem_limit.system_mem_used -= | ||
143 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); | ||
144 | } | ||
145 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, | ||
146 | "kfd system memory accounting unbalanced"); | ||
147 | |||
148 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | ||
149 | } | ||
150 | |||
151 | |||
152 | /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's | ||
153 | * reservation object. | ||
154 | * | ||
155 | * @bo: [IN] Remove eviction fence(s) from this BO | ||
156 | * @ef: [IN] If ef is specified, then this eviction fence is removed if it | ||
157 | * is present in the shared list. | ||
158 | * @ef_list: [OUT] Returns list of eviction fences. These fences are removed | ||
159 | * from BO's reservation object shared list. | ||
160 | * @ef_count: [OUT] Number of fences in ef_list. | ||
161 | * | ||
162 | * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be | ||
163 | * called to restore the eviction fences and to avoid memory leak. This is | ||
164 | * useful for shared BOs. | ||
165 | * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. | ||
166 | */ | ||
167 | static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, | ||
168 | struct amdgpu_amdkfd_fence *ef, | ||
169 | struct amdgpu_amdkfd_fence ***ef_list, | ||
170 | unsigned int *ef_count) | ||
171 | { | ||
172 | struct reservation_object_list *fobj; | ||
173 | struct reservation_object *resv; | ||
174 | unsigned int i = 0, j = 0, k = 0, shared_count; | ||
175 | unsigned int count = 0; | ||
176 | struct amdgpu_amdkfd_fence **fence_list; | ||
177 | |||
178 | if (!ef && !ef_list) | ||
179 | return -EINVAL; | ||
180 | |||
181 | if (ef_list) { | ||
182 | *ef_list = NULL; | ||
183 | *ef_count = 0; | ||
184 | } | ||
185 | |||
186 | resv = bo->tbo.resv; | ||
187 | fobj = reservation_object_get_list(resv); | ||
188 | |||
189 | if (!fobj) | ||
190 | return 0; | ||
191 | |||
192 | preempt_disable(); | ||
193 | write_seqcount_begin(&resv->seq); | ||
194 | |||
195 | /* Go through all the shared fences in the resevation object. If | ||
196 | * ef is specified and it exists in the list, remove it and reduce the | ||
197 | * count. If ef is not specified, then get the count of eviction fences | ||
198 | * present. | ||
199 | */ | ||
200 | shared_count = fobj->shared_count; | ||
201 | for (i = 0; i < shared_count; ++i) { | ||
202 | struct dma_fence *f; | ||
203 | |||
204 | f = rcu_dereference_protected(fobj->shared[i], | ||
205 | reservation_object_held(resv)); | ||
206 | |||
207 | if (ef) { | ||
208 | if (f->context == ef->base.context) { | ||
209 | dma_fence_put(f); | ||
210 | fobj->shared_count--; | ||
211 | } else { | ||
212 | RCU_INIT_POINTER(fobj->shared[j++], f); | ||
213 | } | ||
214 | } else if (to_amdgpu_amdkfd_fence(f)) | ||
215 | count++; | ||
216 | } | ||
217 | write_seqcount_end(&resv->seq); | ||
218 | preempt_enable(); | ||
219 | |||
220 | if (ef || !count) | ||
221 | return 0; | ||
222 | |||
223 | /* Alloc memory for count number of eviction fence pointers. Fill the | ||
224 | * ef_list array and ef_count | ||
225 | */ | ||
226 | fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *), | ||
227 | GFP_KERNEL); | ||
228 | if (!fence_list) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | preempt_disable(); | ||
232 | write_seqcount_begin(&resv->seq); | ||
233 | |||
234 | j = 0; | ||
235 | for (i = 0; i < shared_count; ++i) { | ||
236 | struct dma_fence *f; | ||
237 | struct amdgpu_amdkfd_fence *efence; | ||
238 | |||
239 | f = rcu_dereference_protected(fobj->shared[i], | ||
240 | reservation_object_held(resv)); | ||
241 | |||
242 | efence = to_amdgpu_amdkfd_fence(f); | ||
243 | if (efence) { | ||
244 | fence_list[k++] = efence; | ||
245 | fobj->shared_count--; | ||
246 | } else { | ||
247 | RCU_INIT_POINTER(fobj->shared[j++], f); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | write_seqcount_end(&resv->seq); | ||
252 | preempt_enable(); | ||
253 | |||
254 | *ef_list = fence_list; | ||
255 | *ef_count = k; | ||
256 | |||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | /* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's | ||
261 | * reservation object. | ||
262 | * | ||
263 | * @bo: [IN] Add eviction fences to this BO | ||
264 | * @ef_list: [IN] List of eviction fences to be added | ||
265 | * @ef_count: [IN] Number of fences in ef_list. | ||
266 | * | ||
267 | * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this | ||
268 | * function. | ||
269 | */ | ||
270 | static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, | ||
271 | struct amdgpu_amdkfd_fence **ef_list, | ||
272 | unsigned int ef_count) | ||
273 | { | ||
274 | int i; | ||
275 | |||
276 | if (!ef_list || !ef_count) | ||
277 | return; | ||
278 | |||
279 | for (i = 0; i < ef_count; i++) { | ||
280 | amdgpu_bo_fence(bo, &ef_list[i]->base, true); | ||
281 | /* Re-adding the fence takes an additional reference. Drop that | ||
282 | * reference. | ||
283 | */ | ||
284 | dma_fence_put(&ef_list[i]->base); | ||
285 | } | ||
286 | |||
287 | kfree(ef_list); | ||
288 | } | ||
289 | |||
290 | static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, | ||
291 | bool wait) | ||
292 | { | ||
293 | struct ttm_operation_ctx ctx = { false, false }; | ||
294 | int ret; | ||
295 | |||
296 | if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), | ||
297 | "Called with userptr BO")) | ||
298 | return -EINVAL; | ||
299 | |||
300 | amdgpu_ttm_placement_from_domain(bo, domain); | ||
301 | |||
302 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||
303 | if (ret) | ||
304 | goto validate_fail; | ||
305 | if (wait) { | ||
306 | struct amdgpu_amdkfd_fence **ef_list; | ||
307 | unsigned int ef_count; | ||
308 | |||
309 | ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, | ||
310 | &ef_count); | ||
311 | if (ret) | ||
312 | goto validate_fail; | ||
313 | |||
314 | ttm_bo_wait(&bo->tbo, false, false); | ||
315 | amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); | ||
316 | } | ||
317 | |||
318 | validate_fail: | ||
319 | return ret; | ||
320 | } | ||
321 | |||
322 | static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) | ||
323 | { | ||
324 | struct amdgpu_vm_parser *p = param; | ||
325 | |||
326 | return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); | ||
327 | } | ||
328 | |||
329 | /* vm_validate_pt_pd_bos - Validate page table and directory BOs | ||
330 | * | ||
331 | * Page directories are not updated here because huge page handling | ||
332 | * during page table updates can invalidate page directory entries | ||
333 | * again. Page directories are only updated after updating page | ||
334 | * tables. | ||
335 | */ | ||
336 | static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) | ||
337 | { | ||
338 | struct amdgpu_bo *pd = vm->base.root.base.bo; | ||
339 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); | ||
340 | struct amdgpu_vm_parser param; | ||
341 | uint64_t addr, flags = AMDGPU_PTE_VALID; | ||
342 | int ret; | ||
343 | |||
344 | param.domain = AMDGPU_GEM_DOMAIN_VRAM; | ||
345 | param.wait = false; | ||
346 | |||
347 | ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, | ||
348 | ¶m); | ||
349 | if (ret) { | ||
350 | pr_err("amdgpu: failed to validate PT BOs\n"); | ||
351 | return ret; | ||
352 | } | ||
353 | |||
354 | ret = amdgpu_amdkfd_validate(¶m, pd); | ||
355 | if (ret) { | ||
356 | pr_err("amdgpu: failed to validate PD\n"); | ||
357 | return ret; | ||
358 | } | ||
359 | |||
360 | addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); | ||
361 | amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); | ||
362 | vm->pd_phys_addr = addr; | ||
363 | |||
364 | if (vm->base.use_cpu_for_update) { | ||
365 | ret = amdgpu_bo_kmap(pd, NULL); | ||
366 | if (ret) { | ||
367 | pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); | ||
368 | return ret; | ||
369 | } | ||
370 | } | ||
371 | |||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, | ||
376 | struct dma_fence *f) | ||
377 | { | ||
378 | int ret = amdgpu_sync_fence(adev, sync, f, false); | ||
379 | |||
380 | /* Sync objects can't handle multiple GPUs (contexts) updating | ||
381 | * sync->last_vm_update. Fortunately we don't need it for | ||
382 | * KFD's purposes, so we can just drop that fence. | ||
383 | */ | ||
384 | if (sync->last_vm_update) { | ||
385 | dma_fence_put(sync->last_vm_update); | ||
386 | sync->last_vm_update = NULL; | ||
387 | } | ||
388 | |||
389 | return ret; | ||
390 | } | ||
391 | |||
392 | static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) | ||
393 | { | ||
394 | struct amdgpu_bo *pd = vm->root.base.bo; | ||
395 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); | ||
396 | int ret; | ||
397 | |||
398 | ret = amdgpu_vm_update_directories(adev, vm); | ||
399 | if (ret) | ||
400 | return ret; | ||
401 | |||
402 | return sync_vm_fence(adev, sync, vm->last_update); | ||
403 | } | ||
404 | |||
405 | /* add_bo_to_vm - Add a BO to a VM | ||
406 | * | ||
407 | * Everything that needs to bo done only once when a BO is first added | ||
408 | * to a VM. It can later be mapped and unmapped many times without | ||
409 | * repeating these steps. | ||
410 | * | ||
411 | * 1. Allocate and initialize BO VA entry data structure | ||
412 | * 2. Add BO to the VM | ||
413 | * 3. Determine ASIC-specific PTE flags | ||
414 | * 4. Alloc page tables and directories if needed | ||
415 | * 4a. Validate new page tables and directories | ||
416 | */ | ||
417 | static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, | ||
418 | struct amdgpu_vm *avm, bool is_aql, | ||
419 | struct kfd_bo_va_list **p_bo_va_entry) | ||
420 | { | ||
421 | int ret; | ||
422 | struct kfd_bo_va_list *bo_va_entry; | ||
423 | struct amdkfd_vm *kvm = container_of(avm, | ||
424 | struct amdkfd_vm, base); | ||
425 | struct amdgpu_bo *pd = avm->root.base.bo; | ||
426 | struct amdgpu_bo *bo = mem->bo; | ||
427 | uint64_t va = mem->va; | ||
428 | struct list_head *list_bo_va = &mem->bo_va_list; | ||
429 | unsigned long bo_size = bo->tbo.mem.size; | ||
430 | |||
431 | if (!va) { | ||
432 | pr_err("Invalid VA when adding BO to VM\n"); | ||
433 | return -EINVAL; | ||
434 | } | ||
435 | |||
436 | if (is_aql) | ||
437 | va += bo_size; | ||
438 | |||
439 | bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); | ||
440 | if (!bo_va_entry) | ||
441 | return -ENOMEM; | ||
442 | |||
443 | pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, | ||
444 | va + bo_size, avm); | ||
445 | |||
446 | /* Add BO to VM internal data structures*/ | ||
447 | bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); | ||
448 | if (!bo_va_entry->bo_va) { | ||
449 | ret = -EINVAL; | ||
450 | pr_err("Failed to add BO object to VM. ret == %d\n", | ||
451 | ret); | ||
452 | goto err_vmadd; | ||
453 | } | ||
454 | |||
455 | bo_va_entry->va = va; | ||
456 | bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, | ||
457 | mem->mapping_flags); | ||
458 | bo_va_entry->kgd_dev = (void *)adev; | ||
459 | list_add(&bo_va_entry->bo_list, list_bo_va); | ||
460 | |||
461 | if (p_bo_va_entry) | ||
462 | *p_bo_va_entry = bo_va_entry; | ||
463 | |||
464 | /* Allocate new page tables if needed and validate | ||
465 | * them. Clearing of new page tables and validate need to wait | ||
466 | * on move fences. We don't want that to trigger the eviction | ||
467 | * fence, so remove it temporarily. | ||
468 | */ | ||
469 | amdgpu_amdkfd_remove_eviction_fence(pd, | ||
470 | kvm->process_info->eviction_fence, | ||
471 | NULL, NULL); | ||
472 | |||
473 | ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); | ||
474 | if (ret) { | ||
475 | pr_err("Failed to allocate pts, err=%d\n", ret); | ||
476 | goto err_alloc_pts; | ||
477 | } | ||
478 | |||
479 | ret = vm_validate_pt_pd_bos(kvm); | ||
480 | if (ret) { | ||
481 | pr_err("validate_pt_pd_bos() failed\n"); | ||
482 | goto err_alloc_pts; | ||
483 | } | ||
484 | |||
485 | /* Add the eviction fence back */ | ||
486 | amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); | ||
487 | |||
488 | return 0; | ||
489 | |||
490 | err_alloc_pts: | ||
491 | amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); | ||
492 | amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); | ||
493 | list_del(&bo_va_entry->bo_list); | ||
494 | err_vmadd: | ||
495 | kfree(bo_va_entry); | ||
496 | return ret; | ||
497 | } | ||
498 | |||
499 | static void remove_bo_from_vm(struct amdgpu_device *adev, | ||
500 | struct kfd_bo_va_list *entry, unsigned long size) | ||
501 | { | ||
502 | pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", | ||
503 | entry->va, | ||
504 | entry->va + size, entry); | ||
505 | amdgpu_vm_bo_rmv(adev, entry->bo_va); | ||
506 | list_del(&entry->bo_list); | ||
507 | kfree(entry); | ||
508 | } | ||
509 | |||
510 | static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, | ||
511 | struct amdkfd_process_info *process_info) | ||
512 | { | ||
513 | struct ttm_validate_buffer *entry = &mem->validate_list; | ||
514 | struct amdgpu_bo *bo = mem->bo; | ||
515 | |||
516 | INIT_LIST_HEAD(&entry->head); | ||
517 | entry->shared = true; | ||
518 | entry->bo = &bo->tbo; | ||
519 | mutex_lock(&process_info->lock); | ||
520 | list_add_tail(&entry->head, &process_info->kfd_bo_list); | ||
521 | mutex_unlock(&process_info->lock); | ||
522 | } | ||
523 | |||
524 | /* Reserving a BO and its page table BOs must happen atomically to | ||
525 | * avoid deadlocks. Some operations update multiple VMs at once. Track | ||
526 | * all the reservation info in a context structure. Optionally a sync | ||
527 | * object can track VM updates. | ||
528 | */ | ||
529 | struct bo_vm_reservation_context { | ||
530 | struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ | ||
531 | unsigned int n_vms; /* Number of VMs reserved */ | ||
532 | struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ | ||
533 | struct ww_acquire_ctx ticket; /* Reservation ticket */ | ||
534 | struct list_head list, duplicates; /* BO lists */ | ||
535 | struct amdgpu_sync *sync; /* Pointer to sync object */ | ||
536 | bool reserved; /* Whether BOs are reserved */ | ||
537 | }; | ||
538 | |||
539 | enum bo_vm_match { | ||
540 | BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ | ||
541 | BO_VM_MAPPED, /* Match VMs where a BO is mapped */ | ||
542 | BO_VM_ALL, /* Match all VMs a BO was added to */ | ||
543 | }; | ||
544 | |||
545 | /** | ||
546 | * reserve_bo_and_vm - reserve a BO and a VM unconditionally. | ||
547 | * @mem: KFD BO structure. | ||
548 | * @vm: the VM to reserve. | ||
549 | * @ctx: the struct that will be used in unreserve_bo_and_vms(). | ||
550 | */ | ||
551 | static int reserve_bo_and_vm(struct kgd_mem *mem, | ||
552 | struct amdgpu_vm *vm, | ||
553 | struct bo_vm_reservation_context *ctx) | ||
554 | { | ||
555 | struct amdgpu_bo *bo = mem->bo; | ||
556 | int ret; | ||
557 | |||
558 | WARN_ON(!vm); | ||
559 | |||
560 | ctx->reserved = false; | ||
561 | ctx->n_vms = 1; | ||
562 | ctx->sync = &mem->sync; | ||
563 | |||
564 | INIT_LIST_HEAD(&ctx->list); | ||
565 | INIT_LIST_HEAD(&ctx->duplicates); | ||
566 | |||
567 | ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); | ||
568 | if (!ctx->vm_pd) | ||
569 | return -ENOMEM; | ||
570 | |||
571 | ctx->kfd_bo.robj = bo; | ||
572 | ctx->kfd_bo.priority = 0; | ||
573 | ctx->kfd_bo.tv.bo = &bo->tbo; | ||
574 | ctx->kfd_bo.tv.shared = true; | ||
575 | ctx->kfd_bo.user_pages = NULL; | ||
576 | list_add(&ctx->kfd_bo.tv.head, &ctx->list); | ||
577 | |||
578 | amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); | ||
579 | |||
580 | ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, | ||
581 | false, &ctx->duplicates); | ||
582 | if (!ret) | ||
583 | ctx->reserved = true; | ||
584 | else { | ||
585 | pr_err("Failed to reserve buffers in ttm\n"); | ||
586 | kfree(ctx->vm_pd); | ||
587 | ctx->vm_pd = NULL; | ||
588 | } | ||
589 | |||
590 | return ret; | ||
591 | } | ||
592 | |||
593 | /** | ||
594 | * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally | ||
595 | * @mem: KFD BO structure. | ||
596 | * @vm: the VM to reserve. If NULL, then all VMs associated with the BO | ||
597 | * is used. Otherwise, a single VM associated with the BO. | ||
598 | * @map_type: the mapping status that will be used to filter the VMs. | ||
599 | * @ctx: the struct that will be used in unreserve_bo_and_vms(). | ||
600 | * | ||
601 | * Returns 0 for success, negative for failure. | ||
602 | */ | ||
603 | static int reserve_bo_and_cond_vms(struct kgd_mem *mem, | ||
604 | struct amdgpu_vm *vm, enum bo_vm_match map_type, | ||
605 | struct bo_vm_reservation_context *ctx) | ||
606 | { | ||
607 | struct amdgpu_bo *bo = mem->bo; | ||
608 | struct kfd_bo_va_list *entry; | ||
609 | unsigned int i; | ||
610 | int ret; | ||
611 | |||
612 | ctx->reserved = false; | ||
613 | ctx->n_vms = 0; | ||
614 | ctx->vm_pd = NULL; | ||
615 | ctx->sync = &mem->sync; | ||
616 | |||
617 | INIT_LIST_HEAD(&ctx->list); | ||
618 | INIT_LIST_HEAD(&ctx->duplicates); | ||
619 | |||
620 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) { | ||
621 | if ((vm && vm != entry->bo_va->base.vm) || | ||
622 | (entry->is_mapped != map_type | ||
623 | && map_type != BO_VM_ALL)) | ||
624 | continue; | ||
625 | |||
626 | ctx->n_vms++; | ||
627 | } | ||
628 | |||
629 | if (ctx->n_vms != 0) { | ||
630 | ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), | ||
631 | GFP_KERNEL); | ||
632 | if (!ctx->vm_pd) | ||
633 | return -ENOMEM; | ||
634 | } | ||
635 | |||
636 | ctx->kfd_bo.robj = bo; | ||
637 | ctx->kfd_bo.priority = 0; | ||
638 | ctx->kfd_bo.tv.bo = &bo->tbo; | ||
639 | ctx->kfd_bo.tv.shared = true; | ||
640 | ctx->kfd_bo.user_pages = NULL; | ||
641 | list_add(&ctx->kfd_bo.tv.head, &ctx->list); | ||
642 | |||
643 | i = 0; | ||
644 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) { | ||
645 | if ((vm && vm != entry->bo_va->base.vm) || | ||
646 | (entry->is_mapped != map_type | ||
647 | && map_type != BO_VM_ALL)) | ||
648 | continue; | ||
649 | |||
650 | amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, | ||
651 | &ctx->vm_pd[i]); | ||
652 | i++; | ||
653 | } | ||
654 | |||
655 | ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, | ||
656 | false, &ctx->duplicates); | ||
657 | if (!ret) | ||
658 | ctx->reserved = true; | ||
659 | else | ||
660 | pr_err("Failed to reserve buffers in ttm.\n"); | ||
661 | |||
662 | if (ret) { | ||
663 | kfree(ctx->vm_pd); | ||
664 | ctx->vm_pd = NULL; | ||
665 | } | ||
666 | |||
667 | return ret; | ||
668 | } | ||
669 | |||
670 | /** | ||
671 | * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context | ||
672 | * @ctx: Reservation context to unreserve | ||
673 | * @wait: Optionally wait for a sync object representing pending VM updates | ||
674 | * @intr: Whether the wait is interruptible | ||
675 | * | ||
676 | * Also frees any resources allocated in | ||
677 | * reserve_bo_and_(cond_)vm(s). Returns the status from | ||
678 | * amdgpu_sync_wait. | ||
679 | */ | ||
680 | static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, | ||
681 | bool wait, bool intr) | ||
682 | { | ||
683 | int ret = 0; | ||
684 | |||
685 | if (wait) | ||
686 | ret = amdgpu_sync_wait(ctx->sync, intr); | ||
687 | |||
688 | if (ctx->reserved) | ||
689 | ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); | ||
690 | kfree(ctx->vm_pd); | ||
691 | |||
692 | ctx->sync = NULL; | ||
693 | |||
694 | ctx->reserved = false; | ||
695 | ctx->vm_pd = NULL; | ||
696 | |||
697 | return ret; | ||
698 | } | ||
699 | |||
700 | static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, | ||
701 | struct kfd_bo_va_list *entry, | ||
702 | struct amdgpu_sync *sync) | ||
703 | { | ||
704 | struct amdgpu_bo_va *bo_va = entry->bo_va; | ||
705 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
706 | struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base); | ||
707 | struct amdgpu_bo *pd = vm->root.base.bo; | ||
708 | |||
709 | /* Remove eviction fence from PD (and thereby from PTs too as | ||
710 | * they share the resv. object). Otherwise during PT update | ||
711 | * job (see amdgpu_vm_bo_update_mapping), eviction fence would | ||
712 | * get added to job->sync object and job execution would | ||
713 | * trigger the eviction fence. | ||
714 | */ | ||
715 | amdgpu_amdkfd_remove_eviction_fence(pd, | ||
716 | kvm->process_info->eviction_fence, | ||
717 | NULL, NULL); | ||
718 | amdgpu_vm_bo_unmap(adev, bo_va, entry->va); | ||
719 | |||
720 | amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); | ||
721 | |||
722 | /* Add the eviction fence back */ | ||
723 | amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); | ||
724 | |||
725 | sync_vm_fence(adev, sync, bo_va->last_pt_update); | ||
726 | |||
727 | return 0; | ||
728 | } | ||
729 | |||
730 | static int update_gpuvm_pte(struct amdgpu_device *adev, | ||
731 | struct kfd_bo_va_list *entry, | ||
732 | struct amdgpu_sync *sync) | ||
733 | { | ||
734 | int ret; | ||
735 | struct amdgpu_vm *vm; | ||
736 | struct amdgpu_bo_va *bo_va; | ||
737 | struct amdgpu_bo *bo; | ||
738 | |||
739 | bo_va = entry->bo_va; | ||
740 | vm = bo_va->base.vm; | ||
741 | bo = bo_va->base.bo; | ||
742 | |||
743 | /* Update the page tables */ | ||
744 | ret = amdgpu_vm_bo_update(adev, bo_va, false); | ||
745 | if (ret) { | ||
746 | pr_err("amdgpu_vm_bo_update failed\n"); | ||
747 | return ret; | ||
748 | } | ||
749 | |||
750 | return sync_vm_fence(adev, sync, bo_va->last_pt_update); | ||
751 | } | ||
752 | |||
753 | static int map_bo_to_gpuvm(struct amdgpu_device *adev, | ||
754 | struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) | ||
755 | { | ||
756 | int ret; | ||
757 | |||
758 | /* Set virtual address for the allocation */ | ||
759 | ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, | ||
760 | amdgpu_bo_size(entry->bo_va->base.bo), | ||
761 | entry->pte_flags); | ||
762 | if (ret) { | ||
763 | pr_err("Failed to map VA 0x%llx in vm. ret %d\n", | ||
764 | entry->va, ret); | ||
765 | return ret; | ||
766 | } | ||
767 | |||
768 | ret = update_gpuvm_pte(adev, entry, sync); | ||
769 | if (ret) { | ||
770 | pr_err("update_gpuvm_pte() failed\n"); | ||
771 | goto update_gpuvm_pte_failed; | ||
772 | } | ||
773 | |||
774 | return 0; | ||
775 | |||
776 | update_gpuvm_pte_failed: | ||
777 | unmap_bo_from_gpuvm(adev, entry, sync); | ||
778 | return ret; | ||
779 | } | ||
780 | |||
781 | static int process_validate_vms(struct amdkfd_process_info *process_info) | ||
782 | { | ||
783 | struct amdkfd_vm *peer_vm; | ||
784 | int ret; | ||
785 | |||
786 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
787 | vm_list_node) { | ||
788 | ret = vm_validate_pt_pd_bos(peer_vm); | ||
789 | if (ret) | ||
790 | return ret; | ||
791 | } | ||
792 | |||
793 | return 0; | ||
794 | } | ||
795 | |||
796 | static int process_update_pds(struct amdkfd_process_info *process_info, | ||
797 | struct amdgpu_sync *sync) | ||
798 | { | ||
799 | struct amdkfd_vm *peer_vm; | ||
800 | int ret; | ||
801 | |||
802 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
803 | vm_list_node) { | ||
804 | ret = vm_update_pds(&peer_vm->base, sync); | ||
805 | if (ret) | ||
806 | return ret; | ||
807 | } | ||
808 | |||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, | ||
813 | void **process_info, | ||
814 | struct dma_fence **ef) | ||
815 | { | ||
816 | int ret; | ||
817 | struct amdkfd_vm *new_vm; | ||
818 | struct amdkfd_process_info *info; | ||
819 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
820 | |||
821 | new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); | ||
822 | if (!new_vm) | ||
823 | return -ENOMEM; | ||
824 | |||
825 | /* Initialize the VM context, allocate the page directory and zero it */ | ||
826 | ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0); | ||
827 | if (ret) { | ||
828 | pr_err("Failed init vm ret %d\n", ret); | ||
829 | goto vm_init_fail; | ||
830 | } | ||
831 | new_vm->adev = adev; | ||
832 | |||
833 | if (!*process_info) { | ||
834 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
835 | if (!info) { | ||
836 | ret = -ENOMEM; | ||
837 | goto alloc_process_info_fail; | ||
838 | } | ||
839 | |||
840 | mutex_init(&info->lock); | ||
841 | INIT_LIST_HEAD(&info->vm_list_head); | ||
842 | INIT_LIST_HEAD(&info->kfd_bo_list); | ||
843 | |||
844 | info->eviction_fence = | ||
845 | amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), | ||
846 | current->mm); | ||
847 | if (!info->eviction_fence) { | ||
848 | pr_err("Failed to create eviction fence\n"); | ||
849 | goto create_evict_fence_fail; | ||
850 | } | ||
851 | |||
852 | *process_info = info; | ||
853 | *ef = dma_fence_get(&info->eviction_fence->base); | ||
854 | } | ||
855 | |||
856 | new_vm->process_info = *process_info; | ||
857 | |||
858 | mutex_lock(&new_vm->process_info->lock); | ||
859 | list_add_tail(&new_vm->vm_list_node, | ||
860 | &(new_vm->process_info->vm_list_head)); | ||
861 | new_vm->process_info->n_vms++; | ||
862 | mutex_unlock(&new_vm->process_info->lock); | ||
863 | |||
864 | *vm = (void *) new_vm; | ||
865 | |||
866 | pr_debug("Created process vm %p\n", *vm); | ||
867 | |||
868 | return ret; | ||
869 | |||
870 | create_evict_fence_fail: | ||
871 | mutex_destroy(&info->lock); | ||
872 | kfree(info); | ||
873 | alloc_process_info_fail: | ||
874 | amdgpu_vm_fini(adev, &new_vm->base); | ||
875 | vm_init_fail: | ||
876 | kfree(new_vm); | ||
877 | return ret; | ||
878 | |||
879 | } | ||
880 | |||
881 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) | ||
882 | { | ||
883 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
884 | struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; | ||
885 | struct amdgpu_vm *avm = &kfd_vm->base; | ||
886 | struct amdgpu_bo *pd; | ||
887 | struct amdkfd_process_info *process_info; | ||
888 | |||
889 | if (WARN_ON(!kgd || !vm)) | ||
890 | return; | ||
891 | |||
892 | pr_debug("Destroying process vm %p\n", vm); | ||
893 | /* Release eviction fence from PD */ | ||
894 | pd = avm->root.base.bo; | ||
895 | amdgpu_bo_reserve(pd, false); | ||
896 | amdgpu_bo_fence(pd, NULL, false); | ||
897 | amdgpu_bo_unreserve(pd); | ||
898 | |||
899 | process_info = kfd_vm->process_info; | ||
900 | |||
901 | mutex_lock(&process_info->lock); | ||
902 | process_info->n_vms--; | ||
903 | list_del(&kfd_vm->vm_list_node); | ||
904 | mutex_unlock(&process_info->lock); | ||
905 | |||
906 | /* Release per-process resources */ | ||
907 | if (!process_info->n_vms) { | ||
908 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); | ||
909 | |||
910 | dma_fence_put(&process_info->eviction_fence->base); | ||
911 | mutex_destroy(&process_info->lock); | ||
912 | kfree(process_info); | ||
913 | } | ||
914 | |||
915 | /* Release the VM context */ | ||
916 | amdgpu_vm_fini(adev, avm); | ||
917 | kfree(vm); | ||
918 | } | ||
919 | |||
920 | uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) | ||
921 | { | ||
922 | struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; | ||
923 | |||
924 | return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; | ||
925 | } | ||
926 | |||
927 | int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | ||
928 | struct kgd_dev *kgd, uint64_t va, uint64_t size, | ||
929 | void *vm, struct kgd_mem **mem, | ||
930 | uint64_t *offset, uint32_t flags) | ||
931 | { | ||
932 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
933 | struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; | ||
934 | struct amdgpu_bo *bo; | ||
935 | int byte_align; | ||
936 | u32 alloc_domain; | ||
937 | u64 alloc_flags; | ||
938 | uint32_t mapping_flags; | ||
939 | int ret; | ||
940 | |||
941 | /* | ||
942 | * Check on which domain to allocate BO | ||
943 | */ | ||
944 | if (flags & ALLOC_MEM_FLAGS_VRAM) { | ||
945 | alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; | ||
946 | alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; | ||
947 | alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? | ||
948 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : | ||
949 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS; | ||
950 | } else if (flags & ALLOC_MEM_FLAGS_GTT) { | ||
951 | alloc_domain = AMDGPU_GEM_DOMAIN_GTT; | ||
952 | alloc_flags = 0; | ||
953 | } else { | ||
954 | return -EINVAL; | ||
955 | } | ||
956 | |||
957 | *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); | ||
958 | if (!*mem) | ||
959 | return -ENOMEM; | ||
960 | INIT_LIST_HEAD(&(*mem)->bo_va_list); | ||
961 | mutex_init(&(*mem)->lock); | ||
962 | (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); | ||
963 | |||
964 | /* Workaround for AQL queue wraparound bug. Map the same | ||
965 | * memory twice. That means we only actually allocate half | ||
966 | * the memory. | ||
967 | */ | ||
968 | if ((*mem)->aql_queue) | ||
969 | size = size >> 1; | ||
970 | |||
971 | /* Workaround for TLB bug on older VI chips */ | ||
972 | byte_align = (adev->family == AMDGPU_FAMILY_VI && | ||
973 | adev->asic_type != CHIP_FIJI && | ||
974 | adev->asic_type != CHIP_POLARIS10 && | ||
975 | adev->asic_type != CHIP_POLARIS11) ? | ||
976 | VI_BO_SIZE_ALIGN : 1; | ||
977 | |||
978 | mapping_flags = AMDGPU_VM_PAGE_READABLE; | ||
979 | if (flags & ALLOC_MEM_FLAGS_WRITABLE) | ||
980 | mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; | ||
981 | if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) | ||
982 | mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; | ||
983 | if (flags & ALLOC_MEM_FLAGS_COHERENT) | ||
984 | mapping_flags |= AMDGPU_VM_MTYPE_UC; | ||
985 | else | ||
986 | mapping_flags |= AMDGPU_VM_MTYPE_NC; | ||
987 | (*mem)->mapping_flags = mapping_flags; | ||
988 | |||
989 | amdgpu_sync_create(&(*mem)->sync); | ||
990 | |||
991 | ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); | ||
992 | if (ret) { | ||
993 | pr_debug("Insufficient system memory\n"); | ||
994 | goto err_reserve_system_mem; | ||
995 | } | ||
996 | |||
997 | pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", | ||
998 | va, size, domain_string(alloc_domain)); | ||
999 | |||
1000 | ret = amdgpu_bo_create(adev, size, byte_align, false, | ||
1001 | alloc_domain, alloc_flags, NULL, NULL, &bo); | ||
1002 | if (ret) { | ||
1003 | pr_debug("Failed to create BO on domain %s. ret %d\n", | ||
1004 | domain_string(alloc_domain), ret); | ||
1005 | goto err_bo_create; | ||
1006 | } | ||
1007 | bo->kfd_bo = *mem; | ||
1008 | (*mem)->bo = bo; | ||
1009 | |||
1010 | (*mem)->va = va; | ||
1011 | (*mem)->domain = alloc_domain; | ||
1012 | (*mem)->mapped_to_gpu_memory = 0; | ||
1013 | (*mem)->process_info = kfd_vm->process_info; | ||
1014 | add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); | ||
1015 | |||
1016 | if (offset) | ||
1017 | *offset = amdgpu_bo_mmap_offset(bo); | ||
1018 | |||
1019 | return 0; | ||
1020 | |||
1021 | err_bo_create: | ||
1022 | unreserve_system_mem_limit(adev, size, alloc_domain); | ||
1023 | err_reserve_system_mem: | ||
1024 | mutex_destroy(&(*mem)->lock); | ||
1025 | kfree(*mem); | ||
1026 | return ret; | ||
1027 | } | ||
1028 | |||
1029 | int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( | ||
1030 | struct kgd_dev *kgd, struct kgd_mem *mem) | ||
1031 | { | ||
1032 | struct amdkfd_process_info *process_info = mem->process_info; | ||
1033 | unsigned long bo_size = mem->bo->tbo.mem.size; | ||
1034 | struct kfd_bo_va_list *entry, *tmp; | ||
1035 | struct bo_vm_reservation_context ctx; | ||
1036 | struct ttm_validate_buffer *bo_list_entry; | ||
1037 | int ret; | ||
1038 | |||
1039 | mutex_lock(&mem->lock); | ||
1040 | |||
1041 | if (mem->mapped_to_gpu_memory > 0) { | ||
1042 | pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", | ||
1043 | mem->va, bo_size); | ||
1044 | mutex_unlock(&mem->lock); | ||
1045 | return -EBUSY; | ||
1046 | } | ||
1047 | |||
1048 | mutex_unlock(&mem->lock); | ||
1049 | /* lock is not needed after this, since mem is unused and will | ||
1050 | * be freed anyway | ||
1051 | */ | ||
1052 | |||
1053 | /* Make sure restore workers don't access the BO any more */ | ||
1054 | bo_list_entry = &mem->validate_list; | ||
1055 | mutex_lock(&process_info->lock); | ||
1056 | list_del(&bo_list_entry->head); | ||
1057 | mutex_unlock(&process_info->lock); | ||
1058 | |||
1059 | ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); | ||
1060 | if (unlikely(ret)) | ||
1061 | return ret; | ||
1062 | |||
1063 | /* The eviction fence should be removed by the last unmap. | ||
1064 | * TODO: Log an error condition if the bo still has the eviction fence | ||
1065 | * attached | ||
1066 | */ | ||
1067 | amdgpu_amdkfd_remove_eviction_fence(mem->bo, | ||
1068 | process_info->eviction_fence, | ||
1069 | NULL, NULL); | ||
1070 | pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, | ||
1071 | mem->va + bo_size * (1 + mem->aql_queue)); | ||
1072 | |||
1073 | /* Remove from VM internal data structures */ | ||
1074 | list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) | ||
1075 | remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, | ||
1076 | entry, bo_size); | ||
1077 | |||
1078 | ret = unreserve_bo_and_vms(&ctx, false, false); | ||
1079 | |||
1080 | /* Free the sync object */ | ||
1081 | amdgpu_sync_free(&mem->sync); | ||
1082 | |||
1083 | /* Free the BO*/ | ||
1084 | amdgpu_bo_unref(&mem->bo); | ||
1085 | mutex_destroy(&mem->lock); | ||
1086 | kfree(mem); | ||
1087 | |||
1088 | return ret; | ||
1089 | } | ||
1090 | |||
1091 | int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | ||
1092 | struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) | ||
1093 | { | ||
1094 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
1095 | struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; | ||
1096 | int ret; | ||
1097 | struct amdgpu_bo *bo; | ||
1098 | uint32_t domain; | ||
1099 | struct kfd_bo_va_list *entry; | ||
1100 | struct bo_vm_reservation_context ctx; | ||
1101 | struct kfd_bo_va_list *bo_va_entry = NULL; | ||
1102 | struct kfd_bo_va_list *bo_va_entry_aql = NULL; | ||
1103 | unsigned long bo_size; | ||
1104 | |||
1105 | /* Make sure restore is not running concurrently. | ||
1106 | */ | ||
1107 | mutex_lock(&mem->process_info->lock); | ||
1108 | |||
1109 | mutex_lock(&mem->lock); | ||
1110 | |||
1111 | bo = mem->bo; | ||
1112 | |||
1113 | if (!bo) { | ||
1114 | pr_err("Invalid BO when mapping memory to GPU\n"); | ||
1115 | ret = -EINVAL; | ||
1116 | goto out; | ||
1117 | } | ||
1118 | |||
1119 | domain = mem->domain; | ||
1120 | bo_size = bo->tbo.mem.size; | ||
1121 | |||
1122 | pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", | ||
1123 | mem->va, | ||
1124 | mem->va + bo_size * (1 + mem->aql_queue), | ||
1125 | vm, domain_string(domain)); | ||
1126 | |||
1127 | ret = reserve_bo_and_vm(mem, vm, &ctx); | ||
1128 | if (unlikely(ret)) | ||
1129 | goto out; | ||
1130 | |||
1131 | if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { | ||
1132 | ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, | ||
1133 | &bo_va_entry); | ||
1134 | if (ret) | ||
1135 | goto add_bo_to_vm_failed; | ||
1136 | if (mem->aql_queue) { | ||
1137 | ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, | ||
1138 | true, &bo_va_entry_aql); | ||
1139 | if (ret) | ||
1140 | goto add_bo_to_vm_failed_aql; | ||
1141 | } | ||
1142 | } else { | ||
1143 | ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); | ||
1144 | if (unlikely(ret)) | ||
1145 | goto add_bo_to_vm_failed; | ||
1146 | } | ||
1147 | |||
1148 | if (mem->mapped_to_gpu_memory == 0) { | ||
1149 | /* Validate BO only once. The eviction fence gets added to BO | ||
1150 | * the first time it is mapped. Validate will wait for all | ||
1151 | * background evictions to complete. | ||
1152 | */ | ||
1153 | ret = amdgpu_amdkfd_bo_validate(bo, domain, true); | ||
1154 | if (ret) { | ||
1155 | pr_debug("Validate failed\n"); | ||
1156 | goto map_bo_to_gpuvm_failed; | ||
1157 | } | ||
1158 | } | ||
1159 | |||
1160 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) { | ||
1161 | if (entry->bo_va->base.vm == vm && !entry->is_mapped) { | ||
1162 | pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", | ||
1163 | entry->va, entry->va + bo_size, | ||
1164 | entry); | ||
1165 | |||
1166 | ret = map_bo_to_gpuvm(adev, entry, ctx.sync); | ||
1167 | if (ret) { | ||
1168 | pr_err("Failed to map radeon bo to gpuvm\n"); | ||
1169 | goto map_bo_to_gpuvm_failed; | ||
1170 | } | ||
1171 | |||
1172 | ret = vm_update_pds(vm, ctx.sync); | ||
1173 | if (ret) { | ||
1174 | pr_err("Failed to update page directories\n"); | ||
1175 | goto map_bo_to_gpuvm_failed; | ||
1176 | } | ||
1177 | |||
1178 | entry->is_mapped = true; | ||
1179 | mem->mapped_to_gpu_memory++; | ||
1180 | pr_debug("\t INC mapping count %d\n", | ||
1181 | mem->mapped_to_gpu_memory); | ||
1182 | } | ||
1183 | } | ||
1184 | |||
1185 | if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) | ||
1186 | amdgpu_bo_fence(bo, | ||
1187 | &kfd_vm->process_info->eviction_fence->base, | ||
1188 | true); | ||
1189 | ret = unreserve_bo_and_vms(&ctx, false, false); | ||
1190 | |||
1191 | goto out; | ||
1192 | |||
1193 | map_bo_to_gpuvm_failed: | ||
1194 | if (bo_va_entry_aql) | ||
1195 | remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); | ||
1196 | add_bo_to_vm_failed_aql: | ||
1197 | if (bo_va_entry) | ||
1198 | remove_bo_from_vm(adev, bo_va_entry, bo_size); | ||
1199 | add_bo_to_vm_failed: | ||
1200 | unreserve_bo_and_vms(&ctx, false, false); | ||
1201 | out: | ||
1202 | mutex_unlock(&mem->process_info->lock); | ||
1203 | mutex_unlock(&mem->lock); | ||
1204 | return ret; | ||
1205 | } | ||
1206 | |||
1207 | int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( | ||
1208 | struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) | ||
1209 | { | ||
1210 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
1211 | struct amdkfd_process_info *process_info = | ||
1212 | ((struct amdkfd_vm *)vm)->process_info; | ||
1213 | unsigned long bo_size = mem->bo->tbo.mem.size; | ||
1214 | struct kfd_bo_va_list *entry; | ||
1215 | struct bo_vm_reservation_context ctx; | ||
1216 | int ret; | ||
1217 | |||
1218 | mutex_lock(&mem->lock); | ||
1219 | |||
1220 | ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); | ||
1221 | if (unlikely(ret)) | ||
1222 | goto out; | ||
1223 | /* If no VMs were reserved, it means the BO wasn't actually mapped */ | ||
1224 | if (ctx.n_vms == 0) { | ||
1225 | ret = -EINVAL; | ||
1226 | goto unreserve_out; | ||
1227 | } | ||
1228 | |||
1229 | ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); | ||
1230 | if (unlikely(ret)) | ||
1231 | goto unreserve_out; | ||
1232 | |||
1233 | pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", | ||
1234 | mem->va, | ||
1235 | mem->va + bo_size * (1 + mem->aql_queue), | ||
1236 | vm); | ||
1237 | |||
1238 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) { | ||
1239 | if (entry->bo_va->base.vm == vm && entry->is_mapped) { | ||
1240 | pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", | ||
1241 | entry->va, | ||
1242 | entry->va + bo_size, | ||
1243 | entry); | ||
1244 | |||
1245 | ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); | ||
1246 | if (ret == 0) { | ||
1247 | entry->is_mapped = false; | ||
1248 | } else { | ||
1249 | pr_err("failed to unmap VA 0x%llx\n", | ||
1250 | mem->va); | ||
1251 | goto unreserve_out; | ||
1252 | } | ||
1253 | |||
1254 | mem->mapped_to_gpu_memory--; | ||
1255 | pr_debug("\t DEC mapping count %d\n", | ||
1256 | mem->mapped_to_gpu_memory); | ||
1257 | } | ||
1258 | } | ||
1259 | |||
1260 | /* If BO is unmapped from all VMs, unfence it. It can be evicted if | ||
1261 | * required. | ||
1262 | */ | ||
1263 | if (mem->mapped_to_gpu_memory == 0 && | ||
1264 | !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) | ||
1265 | amdgpu_amdkfd_remove_eviction_fence(mem->bo, | ||
1266 | process_info->eviction_fence, | ||
1267 | NULL, NULL); | ||
1268 | |||
1269 | unreserve_out: | ||
1270 | unreserve_bo_and_vms(&ctx, false, false); | ||
1271 | out: | ||
1272 | mutex_unlock(&mem->lock); | ||
1273 | return ret; | ||
1274 | } | ||
1275 | |||
1276 | int amdgpu_amdkfd_gpuvm_sync_memory( | ||
1277 | struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) | ||
1278 | { | ||
1279 | struct amdgpu_sync sync; | ||
1280 | int ret; | ||
1281 | |||
1282 | amdgpu_sync_create(&sync); | ||
1283 | |||
1284 | mutex_lock(&mem->lock); | ||
1285 | amdgpu_sync_clone(&mem->sync, &sync); | ||
1286 | mutex_unlock(&mem->lock); | ||
1287 | |||
1288 | ret = amdgpu_sync_wait(&sync, intr); | ||
1289 | amdgpu_sync_free(&sync); | ||
1290 | return ret; | ||
1291 | } | ||
1292 | |||
1293 | int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, | ||
1294 | struct kgd_mem *mem, void **kptr, uint64_t *size) | ||
1295 | { | ||
1296 | int ret; | ||
1297 | struct amdgpu_bo *bo = mem->bo; | ||
1298 | |||
1299 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | ||
1300 | pr_err("userptr can't be mapped to kernel\n"); | ||
1301 | return -EINVAL; | ||
1302 | } | ||
1303 | |||
1304 | /* delete kgd_mem from kfd_bo_list to avoid re-validating | ||
1305 | * this BO in BO's restoring after eviction. | ||
1306 | */ | ||
1307 | mutex_lock(&mem->process_info->lock); | ||
1308 | |||
1309 | ret = amdgpu_bo_reserve(bo, true); | ||
1310 | if (ret) { | ||
1311 | pr_err("Failed to reserve bo. ret %d\n", ret); | ||
1312 | goto bo_reserve_failed; | ||
1313 | } | ||
1314 | |||
1315 | ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); | ||
1316 | if (ret) { | ||
1317 | pr_err("Failed to pin bo. ret %d\n", ret); | ||
1318 | goto pin_failed; | ||
1319 | } | ||
1320 | |||
1321 | ret = amdgpu_bo_kmap(bo, kptr); | ||
1322 | if (ret) { | ||
1323 | pr_err("Failed to map bo to kernel. ret %d\n", ret); | ||
1324 | goto kmap_failed; | ||
1325 | } | ||
1326 | |||
1327 | amdgpu_amdkfd_remove_eviction_fence( | ||
1328 | bo, mem->process_info->eviction_fence, NULL, NULL); | ||
1329 | list_del_init(&mem->validate_list.head); | ||
1330 | |||
1331 | if (size) | ||
1332 | *size = amdgpu_bo_size(bo); | ||
1333 | |||
1334 | amdgpu_bo_unreserve(bo); | ||
1335 | |||
1336 | mutex_unlock(&mem->process_info->lock); | ||
1337 | return 0; | ||
1338 | |||
1339 | kmap_failed: | ||
1340 | amdgpu_bo_unpin(bo); | ||
1341 | pin_failed: | ||
1342 | amdgpu_bo_unreserve(bo); | ||
1343 | bo_reserve_failed: | ||
1344 | mutex_unlock(&mem->process_info->lock); | ||
1345 | |||
1346 | return ret; | ||
1347 | } | ||
1348 | |||
1349 | /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given | ||
1350 | * KFD process identified by process_info | ||
1351 | * | ||
1352 | * @process_info: amdkfd_process_info of the KFD process | ||
1353 | * | ||
1354 | * After memory eviction, restore thread calls this function. The function | ||
1355 | * should be called when the Process is still valid. BO restore involves - | ||
1356 | * | ||
1357 | * 1. Release old eviction fence and create new one | ||
1358 | * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. | ||
1359 | * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of | ||
1360 | * BOs that need to be reserved. | ||
1361 | * 4. Reserve all the BOs | ||
1362 | * 5. Validate of PD and PT BOs. | ||
1363 | * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence | ||
1364 | * 7. Add fence to all PD and PT BOs. | ||
1365 | * 8. Unreserve all BOs | ||
1366 | */ | ||
1367 | int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) | ||
1368 | { | ||
1369 | struct amdgpu_bo_list_entry *pd_bo_list; | ||
1370 | struct amdkfd_process_info *process_info = info; | ||
1371 | struct amdkfd_vm *peer_vm; | ||
1372 | struct kgd_mem *mem; | ||
1373 | struct bo_vm_reservation_context ctx; | ||
1374 | struct amdgpu_amdkfd_fence *new_fence; | ||
1375 | int ret = 0, i; | ||
1376 | struct list_head duplicate_save; | ||
1377 | struct amdgpu_sync sync_obj; | ||
1378 | |||
1379 | INIT_LIST_HEAD(&duplicate_save); | ||
1380 | INIT_LIST_HEAD(&ctx.list); | ||
1381 | INIT_LIST_HEAD(&ctx.duplicates); | ||
1382 | |||
1383 | pd_bo_list = kcalloc(process_info->n_vms, | ||
1384 | sizeof(struct amdgpu_bo_list_entry), | ||
1385 | GFP_KERNEL); | ||
1386 | if (!pd_bo_list) | ||
1387 | return -ENOMEM; | ||
1388 | |||
1389 | i = 0; | ||
1390 | mutex_lock(&process_info->lock); | ||
1391 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
1392 | vm_list_node) | ||
1393 | amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, | ||
1394 | &pd_bo_list[i++]); | ||
1395 | |||
1396 | /* Reserve all BOs and page tables/directory. Add all BOs from | ||
1397 | * kfd_bo_list to ctx.list | ||
1398 | */ | ||
1399 | list_for_each_entry(mem, &process_info->kfd_bo_list, | ||
1400 | validate_list.head) { | ||
1401 | |||
1402 | list_add_tail(&mem->resv_list.head, &ctx.list); | ||
1403 | mem->resv_list.bo = mem->validate_list.bo; | ||
1404 | mem->resv_list.shared = mem->validate_list.shared; | ||
1405 | } | ||
1406 | |||
1407 | ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, | ||
1408 | false, &duplicate_save); | ||
1409 | if (ret) { | ||
1410 | pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); | ||
1411 | goto ttm_reserve_fail; | ||
1412 | } | ||
1413 | |||
1414 | amdgpu_sync_create(&sync_obj); | ||
1415 | |||
1416 | /* Validate PDs and PTs */ | ||
1417 | ret = process_validate_vms(process_info); | ||
1418 | if (ret) | ||
1419 | goto validate_map_fail; | ||
1420 | |||
1421 | /* Wait for PD/PTs validate to finish */ | ||
1422 | /* FIXME: I think this isn't needed */ | ||
1423 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
1424 | vm_list_node) { | ||
1425 | struct amdgpu_bo *bo = peer_vm->base.root.base.bo; | ||
1426 | |||
1427 | ttm_bo_wait(&bo->tbo, false, false); | ||
1428 | } | ||
1429 | |||
1430 | /* Validate BOs and map them to GPUVM (update VM page tables). */ | ||
1431 | list_for_each_entry(mem, &process_info->kfd_bo_list, | ||
1432 | validate_list.head) { | ||
1433 | |||
1434 | struct amdgpu_bo *bo = mem->bo; | ||
1435 | uint32_t domain = mem->domain; | ||
1436 | struct kfd_bo_va_list *bo_va_entry; | ||
1437 | |||
1438 | ret = amdgpu_amdkfd_bo_validate(bo, domain, false); | ||
1439 | if (ret) { | ||
1440 | pr_debug("Memory eviction: Validate BOs failed. Try again\n"); | ||
1441 | goto validate_map_fail; | ||
1442 | } | ||
1443 | |||
1444 | list_for_each_entry(bo_va_entry, &mem->bo_va_list, | ||
1445 | bo_list) { | ||
1446 | ret = update_gpuvm_pte((struct amdgpu_device *) | ||
1447 | bo_va_entry->kgd_dev, | ||
1448 | bo_va_entry, | ||
1449 | &sync_obj); | ||
1450 | if (ret) { | ||
1451 | pr_debug("Memory eviction: update PTE failed. Try again\n"); | ||
1452 | goto validate_map_fail; | ||
1453 | } | ||
1454 | } | ||
1455 | } | ||
1456 | |||
1457 | /* Update page directories */ | ||
1458 | ret = process_update_pds(process_info, &sync_obj); | ||
1459 | if (ret) { | ||
1460 | pr_debug("Memory eviction: update PDs failed. Try again\n"); | ||
1461 | goto validate_map_fail; | ||
1462 | } | ||
1463 | |||
1464 | amdgpu_sync_wait(&sync_obj, false); | ||
1465 | |||
1466 | /* Release old eviction fence and create new one, because fence only | ||
1467 | * goes from unsignaled to signaled, fence cannot be reused. | ||
1468 | * Use context and mm from the old fence. | ||
1469 | */ | ||
1470 | new_fence = amdgpu_amdkfd_fence_create( | ||
1471 | process_info->eviction_fence->base.context, | ||
1472 | process_info->eviction_fence->mm); | ||
1473 | if (!new_fence) { | ||
1474 | pr_err("Failed to create eviction fence\n"); | ||
1475 | ret = -ENOMEM; | ||
1476 | goto validate_map_fail; | ||
1477 | } | ||
1478 | dma_fence_put(&process_info->eviction_fence->base); | ||
1479 | process_info->eviction_fence = new_fence; | ||
1480 | *ef = dma_fence_get(&new_fence->base); | ||
1481 | |||
1482 | /* Wait for validate to finish and attach new eviction fence */ | ||
1483 | list_for_each_entry(mem, &process_info->kfd_bo_list, | ||
1484 | validate_list.head) | ||
1485 | ttm_bo_wait(&mem->bo->tbo, false, false); | ||
1486 | list_for_each_entry(mem, &process_info->kfd_bo_list, | ||
1487 | validate_list.head) | ||
1488 | amdgpu_bo_fence(mem->bo, | ||
1489 | &process_info->eviction_fence->base, true); | ||
1490 | |||
1491 | /* Attach eviction fence to PD / PT BOs */ | ||
1492 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
1493 | vm_list_node) { | ||
1494 | struct amdgpu_bo *bo = peer_vm->base.root.base.bo; | ||
1495 | |||
1496 | amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); | ||
1497 | } | ||
1498 | |||
1499 | validate_map_fail: | ||
1500 | ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); | ||
1501 | amdgpu_sync_free(&sync_obj); | ||
1502 | ttm_reserve_fail: | ||
1503 | mutex_unlock(&process_info->lock); | ||
1504 | kfree(pd_bo_list); | ||
1505 | return ret; | ||
1506 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 216799ccb545..9157745fce14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <drm/drm_cache.h> | 36 | #include <drm/drm_cache.h> |
37 | #include "amdgpu.h" | 37 | #include "amdgpu.h" |
38 | #include "amdgpu_trace.h" | 38 | #include "amdgpu_trace.h" |
39 | #include "amdgpu_amdkfd.h" | ||
39 | 40 | ||
40 | static bool amdgpu_need_backup(struct amdgpu_device *adev) | 41 | static bool amdgpu_need_backup(struct amdgpu_device *adev) |
41 | { | 42 | { |
@@ -54,6 +55,9 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) | |||
54 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); | 55 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); |
55 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); | 56 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); |
56 | 57 | ||
58 | if (bo->kfd_bo) | ||
59 | amdgpu_amdkfd_unreserve_system_memory_limit(bo); | ||
60 | |||
57 | amdgpu_bo_kunmap(bo); | 61 | amdgpu_bo_kunmap(bo); |
58 | 62 | ||
59 | drm_gem_object_release(&bo->gem_base); | 63 | drm_gem_object_release(&bo->gem_base); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 1cef944ef98d..d4dbfe1f842e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | |||
@@ -92,6 +92,8 @@ struct amdgpu_bo { | |||
92 | struct list_head mn_list; | 92 | struct list_head mn_list; |
93 | struct list_head shadow_list; | 93 | struct list_head shadow_list; |
94 | }; | 94 | }; |
95 | |||
96 | struct kgd_mem *kfd_bo; | ||
95 | }; | 97 | }; |
96 | 98 | ||
97 | static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) | 99 | static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1d0d250cbfdf..1a5911882657 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #include <drm/amdgpu_drm.h> | 27 | #include <drm/amdgpu_drm.h> |
28 | #include <drm/gpu_scheduler.h> | 28 | #include <drm/gpu_scheduler.h> |
29 | #include <drm/drm_print.h> | ||
29 | 30 | ||
30 | /* max number of rings */ | 31 | /* max number of rings */ |
31 | #define AMDGPU_MAX_RINGS 18 | 32 | #define AMDGPU_MAX_RINGS 18 |
@@ -35,8 +36,9 @@ | |||
35 | #define AMDGPU_MAX_UVD_ENC_RINGS 2 | 36 | #define AMDGPU_MAX_UVD_ENC_RINGS 2 |
36 | 37 | ||
37 | /* some special values for the owner field */ | 38 | /* some special values for the owner field */ |
38 | #define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) | 39 | #define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) |
39 | #define AMDGPU_FENCE_OWNER_VM ((void*)1ul) | 40 | #define AMDGPU_FENCE_OWNER_VM ((void *)1ul) |
41 | #define AMDGPU_FENCE_OWNER_KFD ((void *)2ul) | ||
40 | 42 | ||
41 | #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) | 43 | #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) |
42 | #define AMDGPU_FENCE_FLAG_INT (1 << 1) | 44 | #define AMDGPU_FENCE_FLAG_INT (1 << 1) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index df65c66dc956..2d6f5ec77a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <drm/drmP.h> | 31 | #include <drm/drmP.h> |
32 | #include "amdgpu.h" | 32 | #include "amdgpu.h" |
33 | #include "amdgpu_trace.h" | 33 | #include "amdgpu_trace.h" |
34 | #include "amdgpu_amdkfd.h" | ||
34 | 35 | ||
35 | struct amdgpu_sync_entry { | 36 | struct amdgpu_sync_entry { |
36 | struct hlist_node node; | 37 | struct hlist_node node; |
@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, | |||
85 | */ | 86 | */ |
86 | static void *amdgpu_sync_get_owner(struct dma_fence *f) | 87 | static void *amdgpu_sync_get_owner(struct dma_fence *f) |
87 | { | 88 | { |
88 | struct drm_sched_fence *s_fence = to_drm_sched_fence(f); | 89 | struct drm_sched_fence *s_fence; |
90 | struct amdgpu_amdkfd_fence *kfd_fence; | ||
91 | |||
92 | if (!f) | ||
93 | return AMDGPU_FENCE_OWNER_UNDEFINED; | ||
89 | 94 | ||
95 | s_fence = to_drm_sched_fence(f); | ||
90 | if (s_fence) | 96 | if (s_fence) |
91 | return s_fence->owner; | 97 | return s_fence->owner; |
92 | 98 | ||
99 | kfd_fence = to_amdgpu_amdkfd_fence(f); | ||
100 | if (kfd_fence) | ||
101 | return AMDGPU_FENCE_OWNER_KFD; | ||
102 | |||
93 | return AMDGPU_FENCE_OWNER_UNDEFINED; | 103 | return AMDGPU_FENCE_OWNER_UNDEFINED; |
94 | } | 104 | } |
95 | 105 | ||
@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
204 | for (i = 0; i < flist->shared_count; ++i) { | 214 | for (i = 0; i < flist->shared_count; ++i) { |
205 | f = rcu_dereference_protected(flist->shared[i], | 215 | f = rcu_dereference_protected(flist->shared[i], |
206 | reservation_object_held(resv)); | 216 | reservation_object_held(resv)); |
217 | /* We only want to trigger KFD eviction fences on | ||
218 | * evict or move jobs. Skip KFD fences otherwise. | ||
219 | */ | ||
220 | fence_owner = amdgpu_sync_get_owner(f); | ||
221 | if (fence_owner == AMDGPU_FENCE_OWNER_KFD && | ||
222 | owner != AMDGPU_FENCE_OWNER_UNDEFINED) | ||
223 | continue; | ||
224 | |||
207 | if (amdgpu_sync_same_dev(adev, f)) { | 225 | if (amdgpu_sync_same_dev(adev, f)) { |
208 | /* VM updates are only interesting | 226 | /* VM updates are only interesting |
209 | * for other VM updates and moves. | 227 | * for other VM updates and moves. |
210 | */ | 228 | */ |
211 | fence_owner = amdgpu_sync_get_owner(f); | ||
212 | if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && | 229 | if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && |
213 | (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && | 230 | (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && |
214 | ((owner == AMDGPU_FENCE_OWNER_VM) != | 231 | ((owner == AMDGPU_FENCE_OWNER_VM) != |
@@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit | |||
305 | return NULL; | 322 | return NULL; |
306 | } | 323 | } |
307 | 324 | ||
325 | /** | ||
326 | * amdgpu_sync_clone - clone a sync object | ||
327 | * | ||
328 | * @source: sync object to clone | ||
329 | * @clone: pointer to destination sync object | ||
330 | * | ||
331 | * Adds references to all unsignaled fences in @source to @clone. Also | ||
332 | * removes signaled fences from @source while at it. | ||
333 | */ | ||
334 | int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) | ||
335 | { | ||
336 | struct amdgpu_sync_entry *e; | ||
337 | struct hlist_node *tmp; | ||
338 | struct dma_fence *f; | ||
339 | int i, r; | ||
340 | |||
341 | hash_for_each_safe(source->fences, i, tmp, e, node) { | ||
342 | f = e->fence; | ||
343 | if (!dma_fence_is_signaled(f)) { | ||
344 | r = amdgpu_sync_fence(NULL, clone, f, e->explicit); | ||
345 | if (r) | ||
346 | return r; | ||
347 | } else { | ||
348 | hash_del(&e->node); | ||
349 | dma_fence_put(f); | ||
350 | kmem_cache_free(amdgpu_sync_slab, e); | ||
351 | } | ||
352 | } | ||
353 | |||
354 | dma_fence_put(clone->last_vm_update); | ||
355 | clone->last_vm_update = dma_fence_get(source->last_vm_update); | ||
356 | |||
357 | return 0; | ||
358 | } | ||
359 | |||
308 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) | 360 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) |
309 | { | 361 | { |
310 | struct amdgpu_sync_entry *e; | 362 | struct amdgpu_sync_entry *e; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7aba38d5c9df..10cf23a57f17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | |||
@@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
50 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, | 50 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, |
51 | struct amdgpu_ring *ring); | 51 | struct amdgpu_ring *ring); |
52 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); | 52 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); |
53 | int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); | ||
53 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); | 54 | int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); |
54 | void amdgpu_sync_free(struct amdgpu_sync *sync); | 55 | void amdgpu_sync_free(struct amdgpu_sync *sync); |
55 | int amdgpu_sync_init(void); | 56 | int amdgpu_sync_init(void); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 28c33d711bab..c2fae04d769a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "amdgpu.h" | 46 | #include "amdgpu.h" |
47 | #include "amdgpu_object.h" | 47 | #include "amdgpu_object.h" |
48 | #include "amdgpu_trace.h" | 48 | #include "amdgpu_trace.h" |
49 | #include "amdgpu_amdkfd.h" | ||
49 | #include "bif/bif_4_1_d.h" | 50 | #include "bif/bif_4_1_d.h" |
50 | 51 | ||
51 | #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) | 52 | #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) |
@@ -258,6 +259,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) | |||
258 | { | 259 | { |
259 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); | 260 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); |
260 | 261 | ||
262 | /* | ||
263 | * Don't verify access for KFD BOs. They don't have a GEM | ||
264 | * object associated with them. | ||
265 | */ | ||
266 | if (abo->kfd_bo) | ||
267 | return 0; | ||
268 | |||
261 | if (amdgpu_ttm_tt_get_usermm(bo->ttm)) | 269 | if (amdgpu_ttm_tt_get_usermm(bo->ttm)) |
262 | return -EPERM; | 270 | return -EPERM; |
263 | return drm_vma_node_verify_access(&abo->gem_base.vma_node, | 271 | return drm_vma_node_verify_access(&abo->gem_base.vma_node, |
@@ -1171,6 +1179,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, | |||
1171 | { | 1179 | { |
1172 | unsigned long num_pages = bo->mem.num_pages; | 1180 | unsigned long num_pages = bo->mem.num_pages; |
1173 | struct drm_mm_node *node = bo->mem.mm_node; | 1181 | struct drm_mm_node *node = bo->mem.mm_node; |
1182 | struct reservation_object_list *flist; | ||
1183 | struct dma_fence *f; | ||
1184 | int i; | ||
1185 | |||
1186 | /* If bo is a KFD BO, check if the bo belongs to the current process. | ||
1187 | * If true, then return false as any KFD process needs all its BOs to | ||
1188 | * be resident to run successfully | ||
1189 | */ | ||
1190 | flist = reservation_object_get_list(bo->resv); | ||
1191 | if (flist) { | ||
1192 | for (i = 0; i < flist->shared_count; ++i) { | ||
1193 | f = rcu_dereference_protected(flist->shared[i], | ||
1194 | reservation_object_held(bo->resv)); | ||
1195 | if (amdkfd_fence_check_mm(f, current->mm)) | ||
1196 | return false; | ||
1197 | } | ||
1198 | } | ||
1174 | 1199 | ||
1175 | switch (bo->mem.mem_type) { | 1200 | switch (bo->mem.mem_type) { |
1176 | case TTM_PL_TT: | 1201 | case TTM_PL_TT: |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fabf44b262be..e9841518343e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/kfifo.h> | 28 | #include <linux/kfifo.h> |
29 | #include <linux/rbtree.h> | 29 | #include <linux/rbtree.h> |
30 | #include <drm/gpu_scheduler.h> | 30 | #include <drm/gpu_scheduler.h> |
31 | #include <drm/drm_file.h> | ||
31 | 32 | ||
32 | #include "amdgpu_sync.h" | 33 | #include "amdgpu_sync.h" |
33 | #include "amdgpu_ring.h" | 34 | #include "amdgpu_ring.h" |
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index bc5a2945bd2b..ed2f06c9f346 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | config HSA_AMD | 5 | config HSA_AMD |
6 | tristate "HSA kernel driver for AMD GPU devices" | 6 | tristate "HSA kernel driver for AMD GPU devices" |
7 | depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 | 7 | depends on DRM_AMDGPU && X86_64 |
8 | imply AMD_IOMMU_V2 | ||
8 | help | 9 | help |
9 | Enable this if you want to use HSA features on AMD GPU devices. | 10 | Enable this if you want to use HSA features on AMD GPU devices. |
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index a317e76ffb5e..0d0242240c47 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile | |||
@@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ | |||
37 | kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ | 37 | kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ |
38 | kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o | 38 | kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o |
39 | 39 | ||
40 | ifneq ($(CONFIG_AMD_IOMMU_V2),) | ||
41 | amdkfd-y += kfd_iommu.o | ||
42 | endif | ||
43 | |||
40 | amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o | 44 | amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o |
41 | 45 | ||
42 | obj-$(CONFIG_HSA_AMD) += amdkfd.o | 46 | obj-$(CONFIG_HSA_AMD) += amdkfd.o |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 62c3d9cd6ef1..6fe24964540b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | |||
@@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, | |||
901 | 901 | ||
902 | mutex_unlock(&p->mutex); | 902 | mutex_unlock(&p->mutex); |
903 | 903 | ||
904 | if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) | 904 | if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && |
905 | pdd->qpd.vmid != 0) | ||
905 | dev->kfd2kgd->set_scratch_backing_va( | 906 | dev->kfd2kgd->set_scratch_backing_va( |
906 | dev->kgd, args->va_addr, pdd->qpd.vmid); | 907 | dev->kgd, args->va_addr, pdd->qpd.vmid); |
907 | 908 | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2bc2816767a7..7493f47e7fe1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c | |||
@@ -22,10 +22,10 @@ | |||
22 | 22 | ||
23 | #include <linux/pci.h> | 23 | #include <linux/pci.h> |
24 | #include <linux/acpi.h> | 24 | #include <linux/acpi.h> |
25 | #include <linux/amd-iommu.h> | ||
26 | #include "kfd_crat.h" | 25 | #include "kfd_crat.h" |
27 | #include "kfd_priv.h" | 26 | #include "kfd_priv.h" |
28 | #include "kfd_topology.h" | 27 | #include "kfd_topology.h" |
28 | #include "kfd_iommu.h" | ||
29 | 29 | ||
30 | /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. | 30 | /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. |
31 | * GPU processor ID are expressed with Bit[31]=1. | 31 | * GPU processor ID are expressed with Bit[31]=1. |
@@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, | |||
1037 | struct crat_subtype_generic *sub_type_hdr; | 1037 | struct crat_subtype_generic *sub_type_hdr; |
1038 | struct crat_subtype_computeunit *cu; | 1038 | struct crat_subtype_computeunit *cu; |
1039 | struct kfd_cu_info cu_info; | 1039 | struct kfd_cu_info cu_info; |
1040 | struct amd_iommu_device_info iommu_info; | ||
1041 | int avail_size = *size; | 1040 | int avail_size = *size; |
1042 | uint32_t total_num_of_cu; | 1041 | uint32_t total_num_of_cu; |
1043 | int num_of_cache_entries = 0; | 1042 | int num_of_cache_entries = 0; |
1044 | int cache_mem_filled = 0; | 1043 | int cache_mem_filled = 0; |
1045 | int ret = 0; | 1044 | int ret = 0; |
1046 | const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | | ||
1047 | AMD_IOMMU_DEVICE_FLAG_PRI_SUP | | ||
1048 | AMD_IOMMU_DEVICE_FLAG_PASID_SUP; | ||
1049 | struct kfd_local_mem_info local_mem_info; | 1045 | struct kfd_local_mem_info local_mem_info; |
1050 | 1046 | ||
1051 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) | 1047 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) |
@@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, | |||
1106 | /* Check if this node supports IOMMU. During parsing this flag will | 1102 | /* Check if this node supports IOMMU. During parsing this flag will |
1107 | * translate to HSA_CAP_ATS_PRESENT | 1103 | * translate to HSA_CAP_ATS_PRESENT |
1108 | */ | 1104 | */ |
1109 | iommu_info.flags = 0; | 1105 | if (!kfd_iommu_check_device(kdev)) |
1110 | if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { | 1106 | cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; |
1111 | if ((iommu_info.flags & required_iommu_flags) == | ||
1112 | required_iommu_flags) | ||
1113 | cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; | ||
1114 | } | ||
1115 | 1107 | ||
1116 | crat_table->length += sub_type_hdr->length; | 1108 | crat_table->length += sub_type_hdr->length; |
1117 | crat_table->total_entries++; | 1109 | crat_table->total_entries++; |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 3da25f7bda6b..9d4af961c5d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include "kfd_pm4_headers_diq.h" | 33 | #include "kfd_pm4_headers_diq.h" |
34 | #include "kfd_dbgmgr.h" | 34 | #include "kfd_dbgmgr.h" |
35 | #include "kfd_dbgdev.h" | 35 | #include "kfd_dbgdev.h" |
36 | #include "kfd_device_queue_manager.h" | ||
36 | 37 | ||
37 | static DEFINE_MUTEX(kfd_dbgmgr_mutex); | 38 | static DEFINE_MUTEX(kfd_dbgmgr_mutex); |
38 | 39 | ||
@@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) | |||
83 | } | 84 | } |
84 | 85 | ||
85 | /* get actual type of DBGDevice cpsch or not */ | 86 | /* get actual type of DBGDevice cpsch or not */ |
86 | if (sched_policy == KFD_SCHED_POLICY_NO_HWS) | 87 | if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) |
87 | type = DBGDEV_TYPE_NODIQ; | 88 | type = DBGDEV_TYPE_NODIQ; |
88 | 89 | ||
89 | kfd_dbgdev_init(new_buff->dbgdev, pdev, type); | 90 | kfd_dbgdev_init(new_buff->dbgdev, pdev, type); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a8fa33a08de3..3346699960dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c | |||
@@ -20,7 +20,9 @@ | |||
20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) | ||
23 | #include <linux/amd-iommu.h> | 24 | #include <linux/amd-iommu.h> |
25 | #endif | ||
24 | #include <linux/bsearch.h> | 26 | #include <linux/bsearch.h> |
25 | #include <linux/pci.h> | 27 | #include <linux/pci.h> |
26 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
@@ -28,9 +30,12 @@ | |||
28 | #include "kfd_device_queue_manager.h" | 30 | #include "kfd_device_queue_manager.h" |
29 | #include "kfd_pm4_headers_vi.h" | 31 | #include "kfd_pm4_headers_vi.h" |
30 | #include "cwsr_trap_handler_gfx8.asm" | 32 | #include "cwsr_trap_handler_gfx8.asm" |
33 | #include "kfd_iommu.h" | ||
31 | 34 | ||
32 | #define MQD_SIZE_ALIGNED 768 | 35 | #define MQD_SIZE_ALIGNED 768 |
36 | static atomic_t kfd_device_suspended = ATOMIC_INIT(0); | ||
33 | 37 | ||
38 | #ifdef KFD_SUPPORT_IOMMU_V2 | ||
34 | static const struct kfd_device_info kaveri_device_info = { | 39 | static const struct kfd_device_info kaveri_device_info = { |
35 | .asic_family = CHIP_KAVERI, | 40 | .asic_family = CHIP_KAVERI, |
36 | .max_pasid_bits = 16, | 41 | .max_pasid_bits = 16, |
@@ -41,6 +46,8 @@ static const struct kfd_device_info kaveri_device_info = { | |||
41 | .num_of_watch_points = 4, | 46 | .num_of_watch_points = 4, |
42 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | 47 | .mqd_size_aligned = MQD_SIZE_ALIGNED, |
43 | .supports_cwsr = false, | 48 | .supports_cwsr = false, |
49 | .needs_iommu_device = true, | ||
50 | .needs_pci_atomics = false, | ||
44 | }; | 51 | }; |
45 | 52 | ||
46 | static const struct kfd_device_info carrizo_device_info = { | 53 | static const struct kfd_device_info carrizo_device_info = { |
@@ -53,15 +60,125 @@ static const struct kfd_device_info carrizo_device_info = { | |||
53 | .num_of_watch_points = 4, | 60 | .num_of_watch_points = 4, |
54 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | 61 | .mqd_size_aligned = MQD_SIZE_ALIGNED, |
55 | .supports_cwsr = true, | 62 | .supports_cwsr = true, |
63 | .needs_iommu_device = true, | ||
64 | .needs_pci_atomics = false, | ||
56 | }; | 65 | }; |
66 | #endif | ||
67 | |||
68 | static const struct kfd_device_info hawaii_device_info = { | ||
69 | .asic_family = CHIP_HAWAII, | ||
70 | .max_pasid_bits = 16, | ||
71 | /* max num of queues for KV.TODO should be a dynamic value */ | ||
72 | .max_no_of_hqd = 24, | ||
73 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | ||
74 | .event_interrupt_class = &event_interrupt_class_cik, | ||
75 | .num_of_watch_points = 4, | ||
76 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | ||
77 | .supports_cwsr = false, | ||
78 | .needs_iommu_device = false, | ||
79 | .needs_pci_atomics = false, | ||
80 | }; | ||
81 | |||
82 | static const struct kfd_device_info tonga_device_info = { | ||
83 | .asic_family = CHIP_TONGA, | ||
84 | .max_pasid_bits = 16, | ||
85 | .max_no_of_hqd = 24, | ||
86 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | ||
87 | .event_interrupt_class = &event_interrupt_class_cik, | ||
88 | .num_of_watch_points = 4, | ||
89 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | ||
90 | .supports_cwsr = false, | ||
91 | .needs_iommu_device = false, | ||
92 | .needs_pci_atomics = true, | ||
93 | }; | ||
94 | |||
95 | static const struct kfd_device_info tonga_vf_device_info = { | ||
96 | .asic_family = CHIP_TONGA, | ||
97 | .max_pasid_bits = 16, | ||
98 | .max_no_of_hqd = 24, | ||
99 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | ||
100 | .event_interrupt_class = &event_interrupt_class_cik, | ||
101 | .num_of_watch_points = 4, | ||
102 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | ||
103 | .supports_cwsr = false, | ||
104 | .needs_iommu_device = false, | ||
105 | .needs_pci_atomics = false, | ||
106 | }; | ||
107 | |||
108 | static const struct kfd_device_info fiji_device_info = { | ||
109 | .asic_family = CHIP_FIJI, | ||
110 | .max_pasid_bits = 16, | ||
111 | .max_no_of_hqd = 24, | ||
112 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | ||
113 | .event_interrupt_class = &event_interrupt_class_cik, | ||
114 | .num_of_watch_points = 4, | ||
115 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | ||
116 | .supports_cwsr = true, | ||
117 | .needs_iommu_device = false, | ||
118 | .needs_pci_atomics = true, | ||
119 | }; | ||
120 | |||
121 | static const struct kfd_device_info fiji_vf_device_info = { | ||
122 | .asic_family = CHIP_FIJI, | ||
123 | .max_pasid_bits = 16, | ||
124 | .max_no_of_hqd = 24, | ||
125 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | ||
126 | .event_interrupt_class = &event_interrupt_class_cik, | ||
127 | .num_of_watch_points = 4, | ||
128 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | ||
129 | .supports_cwsr = true, | ||
130 | .needs_iommu_device = false, | ||
131 | .needs_pci_atomics = false, | ||
132 | }; | ||
133 | |||
134 | |||
135 | static const struct kfd_device_info polaris10_device_info = { | ||
136 | .asic_family = CHIP_POLARIS10, | ||
137 | .max_pasid_bits = 16, | ||
138 | .max_no_of_hqd = 24, | ||
139 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | ||
140 | .event_interrupt_class = &event_interrupt_class_cik, | ||
141 | .num_of_watch_points = 4, | ||
142 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | ||
143 | .supports_cwsr = true, | ||
144 | .needs_iommu_device = false, | ||
145 | .needs_pci_atomics = true, | ||
146 | }; | ||
147 | |||
148 | static const struct kfd_device_info polaris10_vf_device_info = { | ||
149 | .asic_family = CHIP_POLARIS10, | ||
150 | .max_pasid_bits = 16, | ||
151 | .max_no_of_hqd = 24, | ||
152 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | ||
153 | .event_interrupt_class = &event_interrupt_class_cik, | ||
154 | .num_of_watch_points = 4, | ||
155 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | ||
156 | .supports_cwsr = true, | ||
157 | .needs_iommu_device = false, | ||
158 | .needs_pci_atomics = false, | ||
159 | }; | ||
160 | |||
161 | static const struct kfd_device_info polaris11_device_info = { | ||
162 | .asic_family = CHIP_POLARIS11, | ||
163 | .max_pasid_bits = 16, | ||
164 | .max_no_of_hqd = 24, | ||
165 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | ||
166 | .event_interrupt_class = &event_interrupt_class_cik, | ||
167 | .num_of_watch_points = 4, | ||
168 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | ||
169 | .supports_cwsr = true, | ||
170 | .needs_iommu_device = false, | ||
171 | .needs_pci_atomics = true, | ||
172 | }; | ||
173 | |||
57 | 174 | ||
58 | struct kfd_deviceid { | 175 | struct kfd_deviceid { |
59 | unsigned short did; | 176 | unsigned short did; |
60 | const struct kfd_device_info *device_info; | 177 | const struct kfd_device_info *device_info; |
61 | }; | 178 | }; |
62 | 179 | ||
63 | /* Please keep this sorted by increasing device id. */ | ||
64 | static const struct kfd_deviceid supported_devices[] = { | 180 | static const struct kfd_deviceid supported_devices[] = { |
181 | #ifdef KFD_SUPPORT_IOMMU_V2 | ||
65 | { 0x1304, &kaveri_device_info }, /* Kaveri */ | 182 | { 0x1304, &kaveri_device_info }, /* Kaveri */ |
66 | { 0x1305, &kaveri_device_info }, /* Kaveri */ | 183 | { 0x1305, &kaveri_device_info }, /* Kaveri */ |
67 | { 0x1306, &kaveri_device_info }, /* Kaveri */ | 184 | { 0x1306, &kaveri_device_info }, /* Kaveri */ |
@@ -88,7 +205,51 @@ static const struct kfd_deviceid supported_devices[] = { | |||
88 | { 0x9874, &carrizo_device_info }, /* Carrizo */ | 205 | { 0x9874, &carrizo_device_info }, /* Carrizo */ |
89 | { 0x9875, &carrizo_device_info }, /* Carrizo */ | 206 | { 0x9875, &carrizo_device_info }, /* Carrizo */ |
90 | { 0x9876, &carrizo_device_info }, /* Carrizo */ | 207 | { 0x9876, &carrizo_device_info }, /* Carrizo */ |
91 | { 0x9877, &carrizo_device_info } /* Carrizo */ | 208 | { 0x9877, &carrizo_device_info }, /* Carrizo */ |
209 | #endif | ||
210 | { 0x67A0, &hawaii_device_info }, /* Hawaii */ | ||
211 | { 0x67A1, &hawaii_device_info }, /* Hawaii */ | ||
212 | { 0x67A2, &hawaii_device_info }, /* Hawaii */ | ||
213 | { 0x67A8, &hawaii_device_info }, /* Hawaii */ | ||
214 | { 0x67A9, &hawaii_device_info }, /* Hawaii */ | ||
215 | { 0x67AA, &hawaii_device_info }, /* Hawaii */ | ||
216 | { 0x67B0, &hawaii_device_info }, /* Hawaii */ | ||
217 | { 0x67B1, &hawaii_device_info }, /* Hawaii */ | ||
218 | { 0x67B8, &hawaii_device_info }, /* Hawaii */ | ||
219 | { 0x67B9, &hawaii_device_info }, /* Hawaii */ | ||
220 | { 0x67BA, &hawaii_device_info }, /* Hawaii */ | ||
221 | { 0x67BE, &hawaii_device_info }, /* Hawaii */ | ||
222 | { 0x6920, &tonga_device_info }, /* Tonga */ | ||
223 | { 0x6921, &tonga_device_info }, /* Tonga */ | ||
224 | { 0x6928, &tonga_device_info }, /* Tonga */ | ||
225 | { 0x6929, &tonga_device_info }, /* Tonga */ | ||
226 | { 0x692B, &tonga_device_info }, /* Tonga */ | ||
227 | { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ | ||
228 | { 0x6938, &tonga_device_info }, /* Tonga */ | ||
229 | { 0x6939, &tonga_device_info }, /* Tonga */ | ||
230 | { 0x7300, &fiji_device_info }, /* Fiji */ | ||
231 | { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ | ||
232 | { 0x67C0, &polaris10_device_info }, /* Polaris10 */ | ||
233 | { 0x67C1, &polaris10_device_info }, /* Polaris10 */ | ||
234 | { 0x67C2, &polaris10_device_info }, /* Polaris10 */ | ||
235 | { 0x67C4, &polaris10_device_info }, /* Polaris10 */ | ||
236 | { 0x67C7, &polaris10_device_info }, /* Polaris10 */ | ||
237 | { 0x67C8, &polaris10_device_info }, /* Polaris10 */ | ||
238 | { 0x67C9, &polaris10_device_info }, /* Polaris10 */ | ||
239 | { 0x67CA, &polaris10_device_info }, /* Polaris10 */ | ||
240 | { 0x67CC, &polaris10_device_info }, /* Polaris10 */ | ||
241 | { 0x67CF, &polaris10_device_info }, /* Polaris10 */ | ||
242 | { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ | ||
243 | { 0x67DF, &polaris10_device_info }, /* Polaris10 */ | ||
244 | { 0x67E0, &polaris11_device_info }, /* Polaris11 */ | ||
245 | { 0x67E1, &polaris11_device_info }, /* Polaris11 */ | ||
246 | { 0x67E3, &polaris11_device_info }, /* Polaris11 */ | ||
247 | { 0x67E7, &polaris11_device_info }, /* Polaris11 */ | ||
248 | { 0x67E8, &polaris11_device_info }, /* Polaris11 */ | ||
249 | { 0x67E9, &polaris11_device_info }, /* Polaris11 */ | ||
250 | { 0x67EB, &polaris11_device_info }, /* Polaris11 */ | ||
251 | { 0x67EF, &polaris11_device_info }, /* Polaris11 */ | ||
252 | { 0x67FF, &polaris11_device_info }, /* Polaris11 */ | ||
92 | }; | 253 | }; |
93 | 254 | ||
94 | static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, | 255 | static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, |
@@ -127,6 +288,21 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, | |||
127 | return NULL; | 288 | return NULL; |
128 | } | 289 | } |
129 | 290 | ||
291 | if (device_info->needs_pci_atomics) { | ||
292 | /* Allow BIF to recode atomics to PCIe 3.0 | ||
293 | * AtomicOps. 32 and 64-bit requests are possible and | ||
294 | * must be supported. | ||
295 | */ | ||
296 | if (pci_enable_atomic_ops_to_root(pdev, | ||
297 | PCI_EXP_DEVCAP2_ATOMIC_COMP32 | | ||
298 | PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { | ||
299 | dev_info(kfd_device, | ||
300 | "skipped device %x:%x, PCI rejects atomics", | ||
301 | pdev->vendor, pdev->device); | ||
302 | return NULL; | ||
303 | } | ||
304 | } | ||
305 | |||
130 | kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); | 306 | kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); |
131 | if (!kfd) | 307 | if (!kfd) |
132 | return NULL; | 308 | return NULL; |
@@ -144,77 +320,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, | |||
144 | return kfd; | 320 | return kfd; |
145 | } | 321 | } |
146 | 322 | ||
147 | static bool device_iommu_pasid_init(struct kfd_dev *kfd) | ||
148 | { | ||
149 | const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | | ||
150 | AMD_IOMMU_DEVICE_FLAG_PRI_SUP | | ||
151 | AMD_IOMMU_DEVICE_FLAG_PASID_SUP; | ||
152 | |||
153 | struct amd_iommu_device_info iommu_info; | ||
154 | unsigned int pasid_limit; | ||
155 | int err; | ||
156 | |||
157 | err = amd_iommu_device_info(kfd->pdev, &iommu_info); | ||
158 | if (err < 0) { | ||
159 | dev_err(kfd_device, | ||
160 | "error getting iommu info. is the iommu enabled?\n"); | ||
161 | return false; | ||
162 | } | ||
163 | |||
164 | if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { | ||
165 | dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", | ||
166 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, | ||
167 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, | ||
168 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) | ||
169 | != 0); | ||
170 | return false; | ||
171 | } | ||
172 | |||
173 | pasid_limit = min_t(unsigned int, | ||
174 | (unsigned int)(1 << kfd->device_info->max_pasid_bits), | ||
175 | iommu_info.max_pasids); | ||
176 | |||
177 | if (!kfd_set_pasid_limit(pasid_limit)) { | ||
178 | dev_err(kfd_device, "error setting pasid limit\n"); | ||
179 | return false; | ||
180 | } | ||
181 | |||
182 | return true; | ||
183 | } | ||
184 | |||
185 | static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) | ||
186 | { | ||
187 | struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); | ||
188 | |||
189 | if (dev) | ||
190 | kfd_process_iommu_unbind_callback(dev, pasid); | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * This function called by IOMMU driver on PPR failure | ||
195 | */ | ||
196 | static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, | ||
197 | unsigned long address, u16 flags) | ||
198 | { | ||
199 | struct kfd_dev *dev; | ||
200 | |||
201 | dev_warn(kfd_device, | ||
202 | "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", | ||
203 | PCI_BUS_NUM(pdev->devfn), | ||
204 | PCI_SLOT(pdev->devfn), | ||
205 | PCI_FUNC(pdev->devfn), | ||
206 | pasid, | ||
207 | address, | ||
208 | flags); | ||
209 | |||
210 | dev = kfd_device_by_pci_dev(pdev); | ||
211 | if (!WARN_ON(!dev)) | ||
212 | kfd_signal_iommu_event(dev, pasid, address, | ||
213 | flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); | ||
214 | |||
215 | return AMD_IOMMU_INV_PRI_RSP_INVALID; | ||
216 | } | ||
217 | |||
218 | static void kfd_cwsr_init(struct kfd_dev *kfd) | 323 | static void kfd_cwsr_init(struct kfd_dev *kfd) |
219 | { | 324 | { |
220 | if (cwsr_enable && kfd->device_info->supports_cwsr) { | 325 | if (cwsr_enable && kfd->device_info->supports_cwsr) { |
@@ -304,11 +409,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, | |||
304 | goto device_queue_manager_error; | 409 | goto device_queue_manager_error; |
305 | } | 410 | } |
306 | 411 | ||
307 | if (!device_iommu_pasid_init(kfd)) { | 412 | if (kfd_iommu_device_init(kfd)) { |
308 | dev_err(kfd_device, | 413 | dev_err(kfd_device, "Error initializing iommuv2\n"); |
309 | "Error initializing iommuv2 for device %x:%x\n", | 414 | goto device_iommu_error; |
310 | kfd->pdev->vendor, kfd->pdev->device); | ||
311 | goto device_iommu_pasid_error; | ||
312 | } | 415 | } |
313 | 416 | ||
314 | kfd_cwsr_init(kfd); | 417 | kfd_cwsr_init(kfd); |
@@ -323,12 +426,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, | |||
323 | kfd->pdev->device); | 426 | kfd->pdev->device); |
324 | 427 | ||
325 | pr_debug("Starting kfd with the following scheduling policy %d\n", | 428 | pr_debug("Starting kfd with the following scheduling policy %d\n", |
326 | sched_policy); | 429 | kfd->dqm->sched_policy); |
327 | 430 | ||
328 | goto out; | 431 | goto out; |
329 | 432 | ||
330 | kfd_resume_error: | 433 | kfd_resume_error: |
331 | device_iommu_pasid_error: | 434 | device_iommu_error: |
332 | device_queue_manager_uninit(kfd->dqm); | 435 | device_queue_manager_uninit(kfd->dqm); |
333 | device_queue_manager_error: | 436 | device_queue_manager_error: |
334 | kfd_interrupt_exit(kfd); | 437 | kfd_interrupt_exit(kfd); |
@@ -367,40 +470,45 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) | |||
367 | if (!kfd->init_complete) | 470 | if (!kfd->init_complete) |
368 | return; | 471 | return; |
369 | 472 | ||
370 | kfd->dqm->ops.stop(kfd->dqm); | 473 | /* For first KFD device suspend all the KFD processes */ |
474 | if (atomic_inc_return(&kfd_device_suspended) == 1) | ||
475 | kfd_suspend_all_processes(); | ||
371 | 476 | ||
372 | kfd_unbind_processes_from_device(kfd); | 477 | kfd->dqm->ops.stop(kfd->dqm); |
373 | 478 | ||
374 | amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); | 479 | kfd_iommu_suspend(kfd); |
375 | amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); | ||
376 | amd_iommu_free_device(kfd->pdev); | ||
377 | } | 480 | } |
378 | 481 | ||
379 | int kgd2kfd_resume(struct kfd_dev *kfd) | 482 | int kgd2kfd_resume(struct kfd_dev *kfd) |
380 | { | 483 | { |
484 | int ret, count; | ||
485 | |||
381 | if (!kfd->init_complete) | 486 | if (!kfd->init_complete) |
382 | return 0; | 487 | return 0; |
383 | 488 | ||
384 | return kfd_resume(kfd); | 489 | ret = kfd_resume(kfd); |
490 | if (ret) | ||
491 | return ret; | ||
492 | |||
493 | count = atomic_dec_return(&kfd_device_suspended); | ||
494 | WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); | ||
495 | if (count == 0) | ||
496 | ret = kfd_resume_all_processes(); | ||
385 | 497 | ||
498 | return ret; | ||
386 | } | 499 | } |
387 | 500 | ||
388 | static int kfd_resume(struct kfd_dev *kfd) | 501 | static int kfd_resume(struct kfd_dev *kfd) |
389 | { | 502 | { |
390 | int err = 0; | 503 | int err = 0; |
391 | unsigned int pasid_limit = kfd_get_pasid_limit(); | ||
392 | 504 | ||
393 | err = amd_iommu_init_device(kfd->pdev, pasid_limit); | 505 | err = kfd_iommu_resume(kfd); |
394 | if (err) | 506 | if (err) { |
395 | return -ENXIO; | 507 | dev_err(kfd_device, |
396 | amd_iommu_set_invalidate_ctx_cb(kfd->pdev, | 508 | "Failed to resume IOMMU for device %x:%x\n", |
397 | iommu_pasid_shutdown_callback); | 509 | kfd->pdev->vendor, kfd->pdev->device); |
398 | amd_iommu_set_invalid_ppr_cb(kfd->pdev, | 510 | return err; |
399 | iommu_invalid_ppr_cb); | 511 | } |
400 | |||
401 | err = kfd_bind_processes_to_device(kfd); | ||
402 | if (err) | ||
403 | goto processes_bind_error; | ||
404 | 512 | ||
405 | err = kfd->dqm->ops.start(kfd->dqm); | 513 | err = kfd->dqm->ops.start(kfd->dqm); |
406 | if (err) { | 514 | if (err) { |
@@ -413,9 +521,7 @@ static int kfd_resume(struct kfd_dev *kfd) | |||
413 | return err; | 521 | return err; |
414 | 522 | ||
415 | dqm_start_error: | 523 | dqm_start_error: |
416 | processes_bind_error: | 524 | kfd_iommu_suspend(kfd); |
417 | amd_iommu_free_device(kfd->pdev); | ||
418 | |||
419 | return err; | 525 | return err; |
420 | } | 526 | } |
421 | 527 | ||
@@ -435,6 +541,54 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) | |||
435 | spin_unlock(&kfd->interrupt_lock); | 541 | spin_unlock(&kfd->interrupt_lock); |
436 | } | 542 | } |
437 | 543 | ||
544 | /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will | ||
545 | * prepare for safe eviction of KFD BOs that belong to the specified | ||
546 | * process. | ||
547 | * | ||
548 | * @mm: mm_struct that identifies the specified KFD process | ||
549 | * @fence: eviction fence attached to KFD process BOs | ||
550 | * | ||
551 | */ | ||
552 | int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, | ||
553 | struct dma_fence *fence) | ||
554 | { | ||
555 | struct kfd_process *p; | ||
556 | unsigned long active_time; | ||
557 | unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS); | ||
558 | |||
559 | if (!fence) | ||
560 | return -EINVAL; | ||
561 | |||
562 | if (dma_fence_is_signaled(fence)) | ||
563 | return 0; | ||
564 | |||
565 | p = kfd_lookup_process_by_mm(mm); | ||
566 | if (!p) | ||
567 | return -ENODEV; | ||
568 | |||
569 | if (fence->seqno == p->last_eviction_seqno) | ||
570 | goto out; | ||
571 | |||
572 | p->last_eviction_seqno = fence->seqno; | ||
573 | |||
574 | /* Avoid KFD process starvation. Wait for at least | ||
575 | * PROCESS_ACTIVE_TIME_MS before evicting the process again | ||
576 | */ | ||
577 | active_time = get_jiffies_64() - p->last_restore_timestamp; | ||
578 | if (delay_jiffies > active_time) | ||
579 | delay_jiffies -= active_time; | ||
580 | else | ||
581 | delay_jiffies = 0; | ||
582 | |||
583 | /* During process initialization eviction_work.dwork is initialized | ||
584 | * to kfd_evict_bo_worker | ||
585 | */ | ||
586 | schedule_delayed_work(&p->eviction_work, delay_jiffies); | ||
587 | out: | ||
588 | kfd_unref_process(p); | ||
589 | return 0; | ||
590 | } | ||
591 | |||
438 | static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, | 592 | static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, |
439 | unsigned int chunk_size) | 593 | unsigned int chunk_size) |
440 | { | 594 | { |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b21285afa4ea..b3b6dab71638 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | |||
@@ -21,10 +21,11 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/ratelimit.h> | ||
25 | #include <linux/printk.h> | ||
24 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
25 | #include <linux/list.h> | 27 | #include <linux/list.h> |
26 | #include <linux/types.h> | 28 | #include <linux/types.h> |
27 | #include <linux/printk.h> | ||
28 | #include <linux/bitops.h> | 29 | #include <linux/bitops.h> |
29 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
30 | #include "kfd_priv.h" | 31 | #include "kfd_priv.h" |
@@ -118,9 +119,8 @@ static int allocate_vmid(struct device_queue_manager *dqm, | |||
118 | if (dqm->vmid_bitmap == 0) | 119 | if (dqm->vmid_bitmap == 0) |
119 | return -ENOMEM; | 120 | return -ENOMEM; |
120 | 121 | ||
121 | bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, | 122 | bit = ffs(dqm->vmid_bitmap) - 1; |
122 | dqm->dev->vm_info.vmid_num_kfd); | 123 | dqm->vmid_bitmap &= ~(1 << bit); |
123 | clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); | ||
124 | 124 | ||
125 | allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; | 125 | allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; |
126 | pr_debug("vmid allocation %d\n", allocated_vmid); | 126 | pr_debug("vmid allocation %d\n", allocated_vmid); |
@@ -130,6 +130,15 @@ static int allocate_vmid(struct device_queue_manager *dqm, | |||
130 | set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); | 130 | set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); |
131 | program_sh_mem_settings(dqm, qpd); | 131 | program_sh_mem_settings(dqm, qpd); |
132 | 132 | ||
133 | /* qpd->page_table_base is set earlier when register_process() | ||
134 | * is called, i.e. when the first queue is created. | ||
135 | */ | ||
136 | dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, | ||
137 | qpd->vmid, | ||
138 | qpd->page_table_base); | ||
139 | /* invalidate the VM context after pasid and vmid mapping is set up */ | ||
140 | kfd_flush_tlb(qpd_to_pdd(qpd)); | ||
141 | |||
133 | return 0; | 142 | return 0; |
134 | } | 143 | } |
135 | 144 | ||
@@ -139,10 +148,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm, | |||
139 | { | 148 | { |
140 | int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; | 149 | int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; |
141 | 150 | ||
151 | kfd_flush_tlb(qpd_to_pdd(qpd)); | ||
152 | |||
142 | /* Release the vmid mapping */ | 153 | /* Release the vmid mapping */ |
143 | set_pasid_vmid_mapping(dqm, 0, qpd->vmid); | 154 | set_pasid_vmid_mapping(dqm, 0, qpd->vmid); |
144 | 155 | ||
145 | set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); | 156 | dqm->vmid_bitmap |= (1 << bit); |
146 | qpd->vmid = 0; | 157 | qpd->vmid = 0; |
147 | q->properties.vmid = 0; | 158 | q->properties.vmid = 0; |
148 | } | 159 | } |
@@ -170,6 +181,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, | |||
170 | goto out_unlock; | 181 | goto out_unlock; |
171 | } | 182 | } |
172 | q->properties.vmid = qpd->vmid; | 183 | q->properties.vmid = qpd->vmid; |
184 | /* | ||
185 | * Eviction state logic: we only mark active queues as evicted | ||
186 | * to avoid the overhead of restoring inactive queues later | ||
187 | */ | ||
188 | if (qpd->evicted) | ||
189 | q->properties.is_evicted = (q->properties.queue_size > 0 && | ||
190 | q->properties.queue_percent > 0 && | ||
191 | q->properties.queue_address != 0); | ||
173 | 192 | ||
174 | q->properties.tba_addr = qpd->tba_addr; | 193 | q->properties.tba_addr = qpd->tba_addr; |
175 | q->properties.tma_addr = qpd->tma_addr; | 194 | q->properties.tma_addr = qpd->tma_addr; |
@@ -223,12 +242,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) | |||
223 | continue; | 242 | continue; |
224 | 243 | ||
225 | if (dqm->allocated_queues[pipe] != 0) { | 244 | if (dqm->allocated_queues[pipe] != 0) { |
226 | bit = find_first_bit( | 245 | bit = ffs(dqm->allocated_queues[pipe]) - 1; |
227 | (unsigned long *)&dqm->allocated_queues[pipe], | 246 | dqm->allocated_queues[pipe] &= ~(1 << bit); |
228 | get_queues_per_pipe(dqm)); | ||
229 | |||
230 | clear_bit(bit, | ||
231 | (unsigned long *)&dqm->allocated_queues[pipe]); | ||
232 | q->pipe = pipe; | 247 | q->pipe = pipe; |
233 | q->queue = bit; | 248 | q->queue = bit; |
234 | set = true; | 249 | set = true; |
@@ -249,7 +264,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) | |||
249 | static inline void deallocate_hqd(struct device_queue_manager *dqm, | 264 | static inline void deallocate_hqd(struct device_queue_manager *dqm, |
250 | struct queue *q) | 265 | struct queue *q) |
251 | { | 266 | { |
252 | set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); | 267 | dqm->allocated_queues[q->pipe] |= (1 << q->queue); |
253 | } | 268 | } |
254 | 269 | ||
255 | static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, | 270 | static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, |
@@ -371,21 +386,35 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) | |||
371 | { | 386 | { |
372 | int retval; | 387 | int retval; |
373 | struct mqd_manager *mqd; | 388 | struct mqd_manager *mqd; |
389 | struct kfd_process_device *pdd; | ||
374 | bool prev_active = false; | 390 | bool prev_active = false; |
375 | 391 | ||
376 | mutex_lock(&dqm->lock); | 392 | mutex_lock(&dqm->lock); |
393 | pdd = kfd_get_process_device_data(q->device, q->process); | ||
394 | if (!pdd) { | ||
395 | retval = -ENODEV; | ||
396 | goto out_unlock; | ||
397 | } | ||
377 | mqd = dqm->ops.get_mqd_manager(dqm, | 398 | mqd = dqm->ops.get_mqd_manager(dqm, |
378 | get_mqd_type_from_queue_type(q->properties.type)); | 399 | get_mqd_type_from_queue_type(q->properties.type)); |
379 | if (!mqd) { | 400 | if (!mqd) { |
380 | retval = -ENOMEM; | 401 | retval = -ENOMEM; |
381 | goto out_unlock; | 402 | goto out_unlock; |
382 | } | 403 | } |
404 | /* | ||
405 | * Eviction state logic: we only mark active queues as evicted | ||
406 | * to avoid the overhead of restoring inactive queues later | ||
407 | */ | ||
408 | if (pdd->qpd.evicted) | ||
409 | q->properties.is_evicted = (q->properties.queue_size > 0 && | ||
410 | q->properties.queue_percent > 0 && | ||
411 | q->properties.queue_address != 0); | ||
383 | 412 | ||
384 | /* Save previous activity state for counters */ | 413 | /* Save previous activity state for counters */ |
385 | prev_active = q->properties.is_active; | 414 | prev_active = q->properties.is_active; |
386 | 415 | ||
387 | /* Make sure the queue is unmapped before updating the MQD */ | 416 | /* Make sure the queue is unmapped before updating the MQD */ |
388 | if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { | 417 | if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { |
389 | retval = unmap_queues_cpsch(dqm, | 418 | retval = unmap_queues_cpsch(dqm, |
390 | KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); | 419 | KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); |
391 | if (retval) { | 420 | if (retval) { |
@@ -417,7 +446,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) | |||
417 | else if (!q->properties.is_active && prev_active) | 446 | else if (!q->properties.is_active && prev_active) |
418 | dqm->queue_count--; | 447 | dqm->queue_count--; |
419 | 448 | ||
420 | if (sched_policy != KFD_SCHED_POLICY_NO_HWS) | 449 | if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) |
421 | retval = map_queues_cpsch(dqm); | 450 | retval = map_queues_cpsch(dqm); |
422 | else if (q->properties.is_active && | 451 | else if (q->properties.is_active && |
423 | (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || | 452 | (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || |
@@ -451,10 +480,193 @@ static struct mqd_manager *get_mqd_manager( | |||
451 | return mqd; | 480 | return mqd; |
452 | } | 481 | } |
453 | 482 | ||
483 | static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, | ||
484 | struct qcm_process_device *qpd) | ||
485 | { | ||
486 | struct queue *q; | ||
487 | struct mqd_manager *mqd; | ||
488 | struct kfd_process_device *pdd; | ||
489 | int retval = 0; | ||
490 | |||
491 | mutex_lock(&dqm->lock); | ||
492 | if (qpd->evicted++ > 0) /* already evicted, do nothing */ | ||
493 | goto out; | ||
494 | |||
495 | pdd = qpd_to_pdd(qpd); | ||
496 | pr_info_ratelimited("Evicting PASID %u queues\n", | ||
497 | pdd->process->pasid); | ||
498 | |||
499 | /* unactivate all active queues on the qpd */ | ||
500 | list_for_each_entry(q, &qpd->queues_list, list) { | ||
501 | if (!q->properties.is_active) | ||
502 | continue; | ||
503 | mqd = dqm->ops.get_mqd_manager(dqm, | ||
504 | get_mqd_type_from_queue_type(q->properties.type)); | ||
505 | if (!mqd) { /* should not be here */ | ||
506 | pr_err("Cannot evict queue, mqd mgr is NULL\n"); | ||
507 | retval = -ENOMEM; | ||
508 | goto out; | ||
509 | } | ||
510 | q->properties.is_evicted = true; | ||
511 | q->properties.is_active = false; | ||
512 | retval = mqd->destroy_mqd(mqd, q->mqd, | ||
513 | KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, | ||
514 | KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); | ||
515 | if (retval) | ||
516 | goto out; | ||
517 | dqm->queue_count--; | ||
518 | } | ||
519 | |||
520 | out: | ||
521 | mutex_unlock(&dqm->lock); | ||
522 | return retval; | ||
523 | } | ||
524 | |||
525 | static int evict_process_queues_cpsch(struct device_queue_manager *dqm, | ||
526 | struct qcm_process_device *qpd) | ||
527 | { | ||
528 | struct queue *q; | ||
529 | struct kfd_process_device *pdd; | ||
530 | int retval = 0; | ||
531 | |||
532 | mutex_lock(&dqm->lock); | ||
533 | if (qpd->evicted++ > 0) /* already evicted, do nothing */ | ||
534 | goto out; | ||
535 | |||
536 | pdd = qpd_to_pdd(qpd); | ||
537 | pr_info_ratelimited("Evicting PASID %u queues\n", | ||
538 | pdd->process->pasid); | ||
539 | |||
540 | /* unactivate all active queues on the qpd */ | ||
541 | list_for_each_entry(q, &qpd->queues_list, list) { | ||
542 | if (!q->properties.is_active) | ||
543 | continue; | ||
544 | q->properties.is_evicted = true; | ||
545 | q->properties.is_active = false; | ||
546 | dqm->queue_count--; | ||
547 | } | ||
548 | retval = execute_queues_cpsch(dqm, | ||
549 | qpd->is_debug ? | ||
550 | KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : | ||
551 | KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); | ||
552 | |||
553 | out: | ||
554 | mutex_unlock(&dqm->lock); | ||
555 | return retval; | ||
556 | } | ||
557 | |||
558 | static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, | ||
559 | struct qcm_process_device *qpd) | ||
560 | { | ||
561 | struct queue *q; | ||
562 | struct mqd_manager *mqd; | ||
563 | struct kfd_process_device *pdd; | ||
564 | uint32_t pd_base; | ||
565 | int retval = 0; | ||
566 | |||
567 | pdd = qpd_to_pdd(qpd); | ||
568 | /* Retrieve PD base */ | ||
569 | pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); | ||
570 | |||
571 | mutex_lock(&dqm->lock); | ||
572 | if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ | ||
573 | goto out; | ||
574 | if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ | ||
575 | qpd->evicted--; | ||
576 | goto out; | ||
577 | } | ||
578 | |||
579 | pr_info_ratelimited("Restoring PASID %u queues\n", | ||
580 | pdd->process->pasid); | ||
581 | |||
582 | /* Update PD Base in QPD */ | ||
583 | qpd->page_table_base = pd_base; | ||
584 | pr_debug("Updated PD address to 0x%08x\n", pd_base); | ||
585 | |||
586 | if (!list_empty(&qpd->queues_list)) { | ||
587 | dqm->dev->kfd2kgd->set_vm_context_page_table_base( | ||
588 | dqm->dev->kgd, | ||
589 | qpd->vmid, | ||
590 | qpd->page_table_base); | ||
591 | kfd_flush_tlb(pdd); | ||
592 | } | ||
593 | |||
594 | /* activate all active queues on the qpd */ | ||
595 | list_for_each_entry(q, &qpd->queues_list, list) { | ||
596 | if (!q->properties.is_evicted) | ||
597 | continue; | ||
598 | mqd = dqm->ops.get_mqd_manager(dqm, | ||
599 | get_mqd_type_from_queue_type(q->properties.type)); | ||
600 | if (!mqd) { /* should not be here */ | ||
601 | pr_err("Cannot restore queue, mqd mgr is NULL\n"); | ||
602 | retval = -ENOMEM; | ||
603 | goto out; | ||
604 | } | ||
605 | q->properties.is_evicted = false; | ||
606 | q->properties.is_active = true; | ||
607 | retval = mqd->load_mqd(mqd, q->mqd, q->pipe, | ||
608 | q->queue, &q->properties, | ||
609 | q->process->mm); | ||
610 | if (retval) | ||
611 | goto out; | ||
612 | dqm->queue_count++; | ||
613 | } | ||
614 | qpd->evicted = 0; | ||
615 | out: | ||
616 | mutex_unlock(&dqm->lock); | ||
617 | return retval; | ||
618 | } | ||
619 | |||
620 | static int restore_process_queues_cpsch(struct device_queue_manager *dqm, | ||
621 | struct qcm_process_device *qpd) | ||
622 | { | ||
623 | struct queue *q; | ||
624 | struct kfd_process_device *pdd; | ||
625 | uint32_t pd_base; | ||
626 | int retval = 0; | ||
627 | |||
628 | pdd = qpd_to_pdd(qpd); | ||
629 | /* Retrieve PD base */ | ||
630 | pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); | ||
631 | |||
632 | mutex_lock(&dqm->lock); | ||
633 | if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ | ||
634 | goto out; | ||
635 | if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ | ||
636 | qpd->evicted--; | ||
637 | goto out; | ||
638 | } | ||
639 | |||
640 | pr_info_ratelimited("Restoring PASID %u queues\n", | ||
641 | pdd->process->pasid); | ||
642 | |||
643 | /* Update PD Base in QPD */ | ||
644 | qpd->page_table_base = pd_base; | ||
645 | pr_debug("Updated PD address to 0x%08x\n", pd_base); | ||
646 | |||
647 | /* activate all active queues on the qpd */ | ||
648 | list_for_each_entry(q, &qpd->queues_list, list) { | ||
649 | if (!q->properties.is_evicted) | ||
650 | continue; | ||
651 | q->properties.is_evicted = false; | ||
652 | q->properties.is_active = true; | ||
653 | dqm->queue_count++; | ||
654 | } | ||
655 | retval = execute_queues_cpsch(dqm, | ||
656 | KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); | ||
657 | if (!retval) | ||
658 | qpd->evicted = 0; | ||
659 | out: | ||
660 | mutex_unlock(&dqm->lock); | ||
661 | return retval; | ||
662 | } | ||
663 | |||
454 | static int register_process(struct device_queue_manager *dqm, | 664 | static int register_process(struct device_queue_manager *dqm, |
455 | struct qcm_process_device *qpd) | 665 | struct qcm_process_device *qpd) |
456 | { | 666 | { |
457 | struct device_process_node *n; | 667 | struct device_process_node *n; |
668 | struct kfd_process_device *pdd; | ||
669 | uint32_t pd_base; | ||
458 | int retval; | 670 | int retval; |
459 | 671 | ||
460 | n = kzalloc(sizeof(*n), GFP_KERNEL); | 672 | n = kzalloc(sizeof(*n), GFP_KERNEL); |
@@ -463,9 +675,16 @@ static int register_process(struct device_queue_manager *dqm, | |||
463 | 675 | ||
464 | n->qpd = qpd; | 676 | n->qpd = qpd; |
465 | 677 | ||
678 | pdd = qpd_to_pdd(qpd); | ||
679 | /* Retrieve PD base */ | ||
680 | pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); | ||
681 | |||
466 | mutex_lock(&dqm->lock); | 682 | mutex_lock(&dqm->lock); |
467 | list_add(&n->list, &dqm->queues); | 683 | list_add(&n->list, &dqm->queues); |
468 | 684 | ||
685 | /* Update PD Base in QPD */ | ||
686 | qpd->page_table_base = pd_base; | ||
687 | |||
469 | retval = dqm->asic_ops.update_qpd(dqm, qpd); | 688 | retval = dqm->asic_ops.update_qpd(dqm, qpd); |
470 | 689 | ||
471 | dqm->processes_count++; | 690 | dqm->processes_count++; |
@@ -589,10 +808,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, | |||
589 | if (dqm->sdma_bitmap == 0) | 808 | if (dqm->sdma_bitmap == 0) |
590 | return -ENOMEM; | 809 | return -ENOMEM; |
591 | 810 | ||
592 | bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, | 811 | bit = ffs(dqm->sdma_bitmap) - 1; |
593 | CIK_SDMA_QUEUES); | 812 | dqm->sdma_bitmap &= ~(1 << bit); |
594 | |||
595 | clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); | ||
596 | *sdma_queue_id = bit; | 813 | *sdma_queue_id = bit; |
597 | 814 | ||
598 | return 0; | 815 | return 0; |
@@ -603,7 +820,7 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm, | |||
603 | { | 820 | { |
604 | if (sdma_queue_id >= CIK_SDMA_QUEUES) | 821 | if (sdma_queue_id >= CIK_SDMA_QUEUES) |
605 | return; | 822 | return; |
606 | set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); | 823 | dqm->sdma_bitmap |= (1 << sdma_queue_id); |
607 | } | 824 | } |
608 | 825 | ||
609 | static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, | 826 | static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, |
@@ -840,6 +1057,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, | |||
840 | retval = -ENOMEM; | 1057 | retval = -ENOMEM; |
841 | goto out; | 1058 | goto out; |
842 | } | 1059 | } |
1060 | /* | ||
1061 | * Eviction state logic: we only mark active queues as evicted | ||
1062 | * to avoid the overhead of restoring inactive queues later | ||
1063 | */ | ||
1064 | if (qpd->evicted) | ||
1065 | q->properties.is_evicted = (q->properties.queue_size > 0 && | ||
1066 | q->properties.queue_percent > 0 && | ||
1067 | q->properties.queue_address != 0); | ||
843 | 1068 | ||
844 | dqm->asic_ops.init_sdma_vm(dqm, q, qpd); | 1069 | dqm->asic_ops.init_sdma_vm(dqm, q, qpd); |
845 | 1070 | ||
@@ -1097,7 +1322,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, | |||
1097 | alternate_aperture_base, | 1322 | alternate_aperture_base, |
1098 | alternate_aperture_size); | 1323 | alternate_aperture_size); |
1099 | 1324 | ||
1100 | if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) | 1325 | if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) |
1101 | program_sh_mem_settings(dqm, qpd); | 1326 | program_sh_mem_settings(dqm, qpd); |
1102 | 1327 | ||
1103 | pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", | 1328 | pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", |
@@ -1242,8 +1467,24 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) | |||
1242 | if (!dqm) | 1467 | if (!dqm) |
1243 | return NULL; | 1468 | return NULL; |
1244 | 1469 | ||
1470 | switch (dev->device_info->asic_family) { | ||
1471 | /* HWS is not available on Hawaii. */ | ||
1472 | case CHIP_HAWAII: | ||
1473 | /* HWS depends on CWSR for timely dequeue. CWSR is not | ||
1474 | * available on Tonga. | ||
1475 | * | ||
1476 | * FIXME: This argument also applies to Kaveri. | ||
1477 | */ | ||
1478 | case CHIP_TONGA: | ||
1479 | dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; | ||
1480 | break; | ||
1481 | default: | ||
1482 | dqm->sched_policy = sched_policy; | ||
1483 | break; | ||
1484 | } | ||
1485 | |||
1245 | dqm->dev = dev; | 1486 | dqm->dev = dev; |
1246 | switch (sched_policy) { | 1487 | switch (dqm->sched_policy) { |
1247 | case KFD_SCHED_POLICY_HWS: | 1488 | case KFD_SCHED_POLICY_HWS: |
1248 | case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: | 1489 | case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: |
1249 | /* initialize dqm for cp scheduling */ | 1490 | /* initialize dqm for cp scheduling */ |
@@ -1262,6 +1503,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) | |||
1262 | dqm->ops.set_cache_memory_policy = set_cache_memory_policy; | 1503 | dqm->ops.set_cache_memory_policy = set_cache_memory_policy; |
1263 | dqm->ops.set_trap_handler = set_trap_handler; | 1504 | dqm->ops.set_trap_handler = set_trap_handler; |
1264 | dqm->ops.process_termination = process_termination_cpsch; | 1505 | dqm->ops.process_termination = process_termination_cpsch; |
1506 | dqm->ops.evict_process_queues = evict_process_queues_cpsch; | ||
1507 | dqm->ops.restore_process_queues = restore_process_queues_cpsch; | ||
1265 | break; | 1508 | break; |
1266 | case KFD_SCHED_POLICY_NO_HWS: | 1509 | case KFD_SCHED_POLICY_NO_HWS: |
1267 | /* initialize dqm for no cp scheduling */ | 1510 | /* initialize dqm for no cp scheduling */ |
@@ -1278,9 +1521,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) | |||
1278 | dqm->ops.set_cache_memory_policy = set_cache_memory_policy; | 1521 | dqm->ops.set_cache_memory_policy = set_cache_memory_policy; |
1279 | dqm->ops.set_trap_handler = set_trap_handler; | 1522 | dqm->ops.set_trap_handler = set_trap_handler; |
1280 | dqm->ops.process_termination = process_termination_nocpsch; | 1523 | dqm->ops.process_termination = process_termination_nocpsch; |
1524 | dqm->ops.evict_process_queues = evict_process_queues_nocpsch; | ||
1525 | dqm->ops.restore_process_queues = | ||
1526 | restore_process_queues_nocpsch; | ||
1281 | break; | 1527 | break; |
1282 | default: | 1528 | default: |
1283 | pr_err("Invalid scheduling policy %d\n", sched_policy); | 1529 | pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); |
1284 | goto out_free; | 1530 | goto out_free; |
1285 | } | 1531 | } |
1286 | 1532 | ||
@@ -1292,6 +1538,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) | |||
1292 | case CHIP_KAVERI: | 1538 | case CHIP_KAVERI: |
1293 | device_queue_manager_init_cik(&dqm->asic_ops); | 1539 | device_queue_manager_init_cik(&dqm->asic_ops); |
1294 | break; | 1540 | break; |
1541 | |||
1542 | case CHIP_HAWAII: | ||
1543 | device_queue_manager_init_cik_hawaii(&dqm->asic_ops); | ||
1544 | break; | ||
1545 | |||
1546 | case CHIP_TONGA: | ||
1547 | case CHIP_FIJI: | ||
1548 | case CHIP_POLARIS10: | ||
1549 | case CHIP_POLARIS11: | ||
1550 | device_queue_manager_init_vi_tonga(&dqm->asic_ops); | ||
1551 | break; | ||
1295 | default: | 1552 | default: |
1296 | WARN(1, "Unexpected ASIC family %u", | 1553 | WARN(1, "Unexpected ASIC family %u", |
1297 | dev->device_info->asic_family); | 1554 | dev->device_info->asic_family); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index c61b693bfa8c..412beff3281d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | |||
@@ -79,6 +79,10 @@ struct device_process_node { | |||
79 | * | 79 | * |
80 | * @process_termination: Clears all process queues belongs to that device. | 80 | * @process_termination: Clears all process queues belongs to that device. |
81 | * | 81 | * |
82 | * @evict_process_queues: Evict all active queues of a process | ||
83 | * | ||
84 | * @restore_process_queues: Restore all evicted queues queues of a process | ||
85 | * | ||
82 | */ | 86 | */ |
83 | 87 | ||
84 | struct device_queue_manager_ops { | 88 | struct device_queue_manager_ops { |
@@ -129,6 +133,11 @@ struct device_queue_manager_ops { | |||
129 | 133 | ||
130 | int (*process_termination)(struct device_queue_manager *dqm, | 134 | int (*process_termination)(struct device_queue_manager *dqm, |
131 | struct qcm_process_device *qpd); | 135 | struct qcm_process_device *qpd); |
136 | |||
137 | int (*evict_process_queues)(struct device_queue_manager *dqm, | ||
138 | struct qcm_process_device *qpd); | ||
139 | int (*restore_process_queues)(struct device_queue_manager *dqm, | ||
140 | struct qcm_process_device *qpd); | ||
132 | }; | 141 | }; |
133 | 142 | ||
134 | struct device_queue_manager_asic_ops { | 143 | struct device_queue_manager_asic_ops { |
@@ -180,12 +189,17 @@ struct device_queue_manager { | |||
180 | unsigned int *fence_addr; | 189 | unsigned int *fence_addr; |
181 | struct kfd_mem_obj *fence_mem; | 190 | struct kfd_mem_obj *fence_mem; |
182 | bool active_runlist; | 191 | bool active_runlist; |
192 | int sched_policy; | ||
183 | }; | 193 | }; |
184 | 194 | ||
185 | void device_queue_manager_init_cik( | 195 | void device_queue_manager_init_cik( |
186 | struct device_queue_manager_asic_ops *asic_ops); | 196 | struct device_queue_manager_asic_ops *asic_ops); |
197 | void device_queue_manager_init_cik_hawaii( | ||
198 | struct device_queue_manager_asic_ops *asic_ops); | ||
187 | void device_queue_manager_init_vi( | 199 | void device_queue_manager_init_vi( |
188 | struct device_queue_manager_asic_ops *asic_ops); | 200 | struct device_queue_manager_asic_ops *asic_ops); |
201 | void device_queue_manager_init_vi_tonga( | ||
202 | struct device_queue_manager_asic_ops *asic_ops); | ||
189 | void program_sh_mem_settings(struct device_queue_manager *dqm, | 203 | void program_sh_mem_settings(struct device_queue_manager *dqm, |
190 | struct qcm_process_device *qpd); | 204 | struct qcm_process_device *qpd); |
191 | unsigned int get_queues_num(struct device_queue_manager *dqm); | 205 | unsigned int get_queues_num(struct device_queue_manager *dqm); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 28e48c90c596..aed4c21417bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c | |||
@@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, | |||
34 | uint64_t alternate_aperture_size); | 34 | uint64_t alternate_aperture_size); |
35 | static int update_qpd_cik(struct device_queue_manager *dqm, | 35 | static int update_qpd_cik(struct device_queue_manager *dqm, |
36 | struct qcm_process_device *qpd); | 36 | struct qcm_process_device *qpd); |
37 | static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, | ||
38 | struct qcm_process_device *qpd); | ||
37 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | 39 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, |
38 | struct qcm_process_device *qpd); | 40 | struct qcm_process_device *qpd); |
41 | static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, | ||
42 | struct queue *q, | ||
43 | struct qcm_process_device *qpd); | ||
39 | 44 | ||
40 | void device_queue_manager_init_cik( | 45 | void device_queue_manager_init_cik( |
41 | struct device_queue_manager_asic_ops *asic_ops) | 46 | struct device_queue_manager_asic_ops *asic_ops) |
@@ -45,6 +50,14 @@ void device_queue_manager_init_cik( | |||
45 | asic_ops->init_sdma_vm = init_sdma_vm; | 50 | asic_ops->init_sdma_vm = init_sdma_vm; |
46 | } | 51 | } |
47 | 52 | ||
53 | void device_queue_manager_init_cik_hawaii( | ||
54 | struct device_queue_manager_asic_ops *asic_ops) | ||
55 | { | ||
56 | asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; | ||
57 | asic_ops->update_qpd = update_qpd_cik_hawaii; | ||
58 | asic_ops->init_sdma_vm = init_sdma_vm_hawaii; | ||
59 | } | ||
60 | |||
48 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) | 61 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) |
49 | { | 62 | { |
50 | /* In 64-bit mode, we can only control the top 3 bits of the LDS, | 63 | /* In 64-bit mode, we can only control the top 3 bits of the LDS, |
@@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm, | |||
132 | return 0; | 145 | return 0; |
133 | } | 146 | } |
134 | 147 | ||
148 | static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, | ||
149 | struct qcm_process_device *qpd) | ||
150 | { | ||
151 | struct kfd_process_device *pdd; | ||
152 | unsigned int temp; | ||
153 | |||
154 | pdd = qpd_to_pdd(qpd); | ||
155 | |||
156 | /* check if sh_mem_config register already configured */ | ||
157 | if (qpd->sh_mem_config == 0) { | ||
158 | qpd->sh_mem_config = | ||
159 | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | | ||
160 | DEFAULT_MTYPE(MTYPE_NONCACHED) | | ||
161 | APE1_MTYPE(MTYPE_NONCACHED); | ||
162 | qpd->sh_mem_ape1_limit = 0; | ||
163 | qpd->sh_mem_ape1_base = 0; | ||
164 | } | ||
165 | |||
166 | /* On dGPU we're always in GPUVM64 addressing mode with 64-bit | ||
167 | * aperture addresses. | ||
168 | */ | ||
169 | temp = get_sh_mem_bases_nybble_64(pdd); | ||
170 | qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); | ||
171 | |||
172 | pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", | ||
173 | qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); | ||
174 | |||
175 | return 0; | ||
176 | } | ||
177 | |||
135 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | 178 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, |
136 | struct qcm_process_device *qpd) | 179 | struct qcm_process_device *qpd) |
137 | { | 180 | { |
@@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | |||
147 | 190 | ||
148 | q->properties.sdma_vm_addr = value; | 191 | q->properties.sdma_vm_addr = value; |
149 | } | 192 | } |
193 | |||
194 | static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, | ||
195 | struct queue *q, | ||
196 | struct qcm_process_device *qpd) | ||
197 | { | ||
198 | /* On dGPU we're always in GPUVM64 addressing mode with 64-bit | ||
199 | * aperture addresses. | ||
200 | */ | ||
201 | q->properties.sdma_vm_addr = | ||
202 | ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << | ||
203 | SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & | ||
204 | SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; | ||
205 | } | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 2fbce57a2f21..fd60a116be37 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c | |||
@@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, | |||
33 | enum cache_policy alternate_policy, | 33 | enum cache_policy alternate_policy, |
34 | void __user *alternate_aperture_base, | 34 | void __user *alternate_aperture_base, |
35 | uint64_t alternate_aperture_size); | 35 | uint64_t alternate_aperture_size); |
36 | static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, | ||
37 | struct qcm_process_device *qpd, | ||
38 | enum cache_policy default_policy, | ||
39 | enum cache_policy alternate_policy, | ||
40 | void __user *alternate_aperture_base, | ||
41 | uint64_t alternate_aperture_size); | ||
36 | static int update_qpd_vi(struct device_queue_manager *dqm, | 42 | static int update_qpd_vi(struct device_queue_manager *dqm, |
37 | struct qcm_process_device *qpd); | 43 | struct qcm_process_device *qpd); |
44 | static int update_qpd_vi_tonga(struct device_queue_manager *dqm, | ||
45 | struct qcm_process_device *qpd); | ||
38 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | 46 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, |
39 | struct qcm_process_device *qpd); | 47 | struct qcm_process_device *qpd); |
48 | static void init_sdma_vm_tonga(struct device_queue_manager *dqm, | ||
49 | struct queue *q, | ||
50 | struct qcm_process_device *qpd); | ||
40 | 51 | ||
41 | void device_queue_manager_init_vi( | 52 | void device_queue_manager_init_vi( |
42 | struct device_queue_manager_asic_ops *asic_ops) | 53 | struct device_queue_manager_asic_ops *asic_ops) |
@@ -46,6 +57,14 @@ void device_queue_manager_init_vi( | |||
46 | asic_ops->init_sdma_vm = init_sdma_vm; | 57 | asic_ops->init_sdma_vm = init_sdma_vm; |
47 | } | 58 | } |
48 | 59 | ||
60 | void device_queue_manager_init_vi_tonga( | ||
61 | struct device_queue_manager_asic_ops *asic_ops) | ||
62 | { | ||
63 | asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; | ||
64 | asic_ops->update_qpd = update_qpd_vi_tonga; | ||
65 | asic_ops->init_sdma_vm = init_sdma_vm_tonga; | ||
66 | } | ||
67 | |||
49 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) | 68 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) |
50 | { | 69 | { |
51 | /* In 64-bit mode, we can only control the top 3 bits of the LDS, | 70 | /* In 64-bit mode, we can only control the top 3 bits of the LDS, |
@@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, | |||
103 | return true; | 122 | return true; |
104 | } | 123 | } |
105 | 124 | ||
125 | static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, | ||
126 | struct qcm_process_device *qpd, | ||
127 | enum cache_policy default_policy, | ||
128 | enum cache_policy alternate_policy, | ||
129 | void __user *alternate_aperture_base, | ||
130 | uint64_t alternate_aperture_size) | ||
131 | { | ||
132 | uint32_t default_mtype; | ||
133 | uint32_t ape1_mtype; | ||
134 | |||
135 | default_mtype = (default_policy == cache_policy_coherent) ? | ||
136 | MTYPE_UC : | ||
137 | MTYPE_NC; | ||
138 | |||
139 | ape1_mtype = (alternate_policy == cache_policy_coherent) ? | ||
140 | MTYPE_UC : | ||
141 | MTYPE_NC; | ||
142 | |||
143 | qpd->sh_mem_config = | ||
144 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << | ||
145 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | | ||
146 | default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | | ||
147 | ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; | ||
148 | |||
149 | return true; | ||
150 | } | ||
151 | |||
106 | static int update_qpd_vi(struct device_queue_manager *dqm, | 152 | static int update_qpd_vi(struct device_queue_manager *dqm, |
107 | struct qcm_process_device *qpd) | 153 | struct qcm_process_device *qpd) |
108 | { | 154 | { |
@@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm, | |||
144 | return 0; | 190 | return 0; |
145 | } | 191 | } |
146 | 192 | ||
193 | static int update_qpd_vi_tonga(struct device_queue_manager *dqm, | ||
194 | struct qcm_process_device *qpd) | ||
195 | { | ||
196 | struct kfd_process_device *pdd; | ||
197 | unsigned int temp; | ||
198 | |||
199 | pdd = qpd_to_pdd(qpd); | ||
200 | |||
201 | /* check if sh_mem_config register already configured */ | ||
202 | if (qpd->sh_mem_config == 0) { | ||
203 | qpd->sh_mem_config = | ||
204 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << | ||
205 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | | ||
206 | MTYPE_UC << | ||
207 | SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | | ||
208 | MTYPE_UC << | ||
209 | SH_MEM_CONFIG__APE1_MTYPE__SHIFT; | ||
210 | |||
211 | qpd->sh_mem_ape1_limit = 0; | ||
212 | qpd->sh_mem_ape1_base = 0; | ||
213 | } | ||
214 | |||
215 | /* On dGPU we're always in GPUVM64 addressing mode with 64-bit | ||
216 | * aperture addresses. | ||
217 | */ | ||
218 | temp = get_sh_mem_bases_nybble_64(pdd); | ||
219 | qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); | ||
220 | |||
221 | pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", | ||
222 | temp, qpd->sh_mem_bases); | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
147 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | 227 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, |
148 | struct qcm_process_device *qpd) | 228 | struct qcm_process_device *qpd) |
149 | { | 229 | { |
@@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, | |||
159 | 239 | ||
160 | q->properties.sdma_vm_addr = value; | 240 | q->properties.sdma_vm_addr = value; |
161 | } | 241 | } |
242 | |||
243 | static void init_sdma_vm_tonga(struct device_queue_manager *dqm, | ||
244 | struct queue *q, | ||
245 | struct qcm_process_device *qpd) | ||
246 | { | ||
247 | /* On dGPU we're always in GPUVM64 addressing mode with 64-bit | ||
248 | * aperture addresses. | ||
249 | */ | ||
250 | q->properties.sdma_vm_addr = | ||
251 | ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << | ||
252 | SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & | ||
253 | SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; | ||
254 | } | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 93aae5c1e78b..6fb9c0d46d63 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/memory.h> | 30 | #include <linux/memory.h> |
31 | #include "kfd_priv.h" | 31 | #include "kfd_priv.h" |
32 | #include "kfd_events.h" | 32 | #include "kfd_events.h" |
33 | #include "kfd_iommu.h" | ||
33 | #include <linux/device.h> | 34 | #include <linux/device.h> |
34 | 35 | ||
35 | /* | 36 | /* |
@@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, | |||
837 | } | 838 | } |
838 | } | 839 | } |
839 | 840 | ||
841 | #ifdef KFD_SUPPORT_IOMMU_V2 | ||
840 | void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, | 842 | void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, |
841 | unsigned long address, bool is_write_requested, | 843 | unsigned long address, bool is_write_requested, |
842 | bool is_execute_requested) | 844 | bool is_execute_requested) |
@@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, | |||
905 | mutex_unlock(&p->event_mutex); | 907 | mutex_unlock(&p->event_mutex); |
906 | kfd_unref_process(p); | 908 | kfd_unref_process(p); |
907 | } | 909 | } |
910 | #endif /* KFD_SUPPORT_IOMMU_V2 */ | ||
908 | 911 | ||
909 | void kfd_signal_hw_exception_event(unsigned int pasid) | 912 | void kfd_signal_hw_exception_event(unsigned int pasid) |
910 | { | 913 | { |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c new file mode 100644 index 000000000000..c71817963eea --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | |||
@@ -0,0 +1,357 @@ | |||
1 | /* | ||
2 | * Copyright 2018 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <linux/printk.h> | ||
24 | #include <linux/device.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/pci.h> | ||
27 | #include <linux/amd-iommu.h> | ||
28 | #include "kfd_priv.h" | ||
29 | #include "kfd_dbgmgr.h" | ||
30 | #include "kfd_topology.h" | ||
31 | #include "kfd_iommu.h" | ||
32 | |||
33 | static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | | ||
34 | AMD_IOMMU_DEVICE_FLAG_PRI_SUP | | ||
35 | AMD_IOMMU_DEVICE_FLAG_PASID_SUP; | ||
36 | |||
37 | /** kfd_iommu_check_device - Check whether IOMMU is available for device | ||
38 | */ | ||
39 | int kfd_iommu_check_device(struct kfd_dev *kfd) | ||
40 | { | ||
41 | struct amd_iommu_device_info iommu_info; | ||
42 | int err; | ||
43 | |||
44 | if (!kfd->device_info->needs_iommu_device) | ||
45 | return -ENODEV; | ||
46 | |||
47 | iommu_info.flags = 0; | ||
48 | err = amd_iommu_device_info(kfd->pdev, &iommu_info); | ||
49 | if (err) | ||
50 | return err; | ||
51 | |||
52 | if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) | ||
53 | return -ENODEV; | ||
54 | |||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | /** kfd_iommu_device_init - Initialize IOMMU for device | ||
59 | */ | ||
60 | int kfd_iommu_device_init(struct kfd_dev *kfd) | ||
61 | { | ||
62 | struct amd_iommu_device_info iommu_info; | ||
63 | unsigned int pasid_limit; | ||
64 | int err; | ||
65 | |||
66 | if (!kfd->device_info->needs_iommu_device) | ||
67 | return 0; | ||
68 | |||
69 | iommu_info.flags = 0; | ||
70 | err = amd_iommu_device_info(kfd->pdev, &iommu_info); | ||
71 | if (err < 0) { | ||
72 | dev_err(kfd_device, | ||
73 | "error getting iommu info. is the iommu enabled?\n"); | ||
74 | return -ENODEV; | ||
75 | } | ||
76 | |||
77 | if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { | ||
78 | dev_err(kfd_device, | ||
79 | "error required iommu flags ats %i, pri %i, pasid %i\n", | ||
80 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, | ||
81 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, | ||
82 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) | ||
83 | != 0); | ||
84 | return -ENODEV; | ||
85 | } | ||
86 | |||
87 | pasid_limit = min_t(unsigned int, | ||
88 | (unsigned int)(1 << kfd->device_info->max_pasid_bits), | ||
89 | iommu_info.max_pasids); | ||
90 | |||
91 | if (!kfd_set_pasid_limit(pasid_limit)) { | ||
92 | dev_err(kfd_device, "error setting pasid limit\n"); | ||
93 | return -EBUSY; | ||
94 | } | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process | ||
100 | * | ||
101 | * Binds the given process to the given device using its PASID. This | ||
102 | * enables IOMMUv2 address translation for the process on the device. | ||
103 | * | ||
104 | * This function assumes that the process mutex is held. | ||
105 | */ | ||
106 | int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) | ||
107 | { | ||
108 | struct kfd_dev *dev = pdd->dev; | ||
109 | struct kfd_process *p = pdd->process; | ||
110 | int err; | ||
111 | |||
112 | if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND) | ||
113 | return 0; | ||
114 | |||
115 | if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { | ||
116 | pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); | ||
117 | return -EINVAL; | ||
118 | } | ||
119 | |||
120 | err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); | ||
121 | if (!err) | ||
122 | pdd->bound = PDD_BOUND; | ||
123 | |||
124 | return err; | ||
125 | } | ||
126 | |||
127 | /** kfd_iommu_unbind_process - Unbind process from all devices | ||
128 | * | ||
129 | * This removes all IOMMU device bindings of the process. To be used | ||
130 | * before process termination. | ||
131 | */ | ||
132 | void kfd_iommu_unbind_process(struct kfd_process *p) | ||
133 | { | ||
134 | struct kfd_process_device *pdd; | ||
135 | |||
136 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) | ||
137 | if (pdd->bound == PDD_BOUND) | ||
138 | amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); | ||
139 | } | ||
140 | |||
141 | /* Callback for process shutdown invoked by the IOMMU driver */ | ||
142 | static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) | ||
143 | { | ||
144 | struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); | ||
145 | struct kfd_process *p; | ||
146 | struct kfd_process_device *pdd; | ||
147 | |||
148 | if (!dev) | ||
149 | return; | ||
150 | |||
151 | /* | ||
152 | * Look for the process that matches the pasid. If there is no such | ||
153 | * process, we either released it in amdkfd's own notifier, or there | ||
154 | * is a bug. Unfortunately, there is no way to tell... | ||
155 | */ | ||
156 | p = kfd_lookup_process_by_pasid(pasid); | ||
157 | if (!p) | ||
158 | return; | ||
159 | |||
160 | pr_debug("Unbinding process %d from IOMMU\n", pasid); | ||
161 | |||
162 | mutex_lock(kfd_get_dbgmgr_mutex()); | ||
163 | |||
164 | if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { | ||
165 | if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { | ||
166 | kfd_dbgmgr_destroy(dev->dbgmgr); | ||
167 | dev->dbgmgr = NULL; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | mutex_unlock(kfd_get_dbgmgr_mutex()); | ||
172 | |||
173 | mutex_lock(&p->mutex); | ||
174 | |||
175 | pdd = kfd_get_process_device_data(dev, p); | ||
176 | if (pdd) | ||
177 | /* For GPU relying on IOMMU, we need to dequeue here | ||
178 | * when PASID is still bound. | ||
179 | */ | ||
180 | kfd_process_dequeue_from_device(pdd); | ||
181 | |||
182 | mutex_unlock(&p->mutex); | ||
183 | |||
184 | kfd_unref_process(p); | ||
185 | } | ||
186 | |||
187 | /* This function called by IOMMU driver on PPR failure */ | ||
188 | static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, | ||
189 | unsigned long address, u16 flags) | ||
190 | { | ||
191 | struct kfd_dev *dev; | ||
192 | |||
193 | dev_warn(kfd_device, | ||
194 | "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", | ||
195 | PCI_BUS_NUM(pdev->devfn), | ||
196 | PCI_SLOT(pdev->devfn), | ||
197 | PCI_FUNC(pdev->devfn), | ||
198 | pasid, | ||
199 | address, | ||
200 | flags); | ||
201 | |||
202 | dev = kfd_device_by_pci_dev(pdev); | ||
203 | if (!WARN_ON(!dev)) | ||
204 | kfd_signal_iommu_event(dev, pasid, address, | ||
205 | flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); | ||
206 | |||
207 | return AMD_IOMMU_INV_PRI_RSP_INVALID; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Bind processes do the device that have been temporarily unbound | ||
212 | * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. | ||
213 | */ | ||
214 | static int kfd_bind_processes_to_device(struct kfd_dev *kfd) | ||
215 | { | ||
216 | struct kfd_process_device *pdd; | ||
217 | struct kfd_process *p; | ||
218 | unsigned int temp; | ||
219 | int err = 0; | ||
220 | |||
221 | int idx = srcu_read_lock(&kfd_processes_srcu); | ||
222 | |||
223 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | ||
224 | mutex_lock(&p->mutex); | ||
225 | pdd = kfd_get_process_device_data(kfd, p); | ||
226 | |||
227 | if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { | ||
228 | mutex_unlock(&p->mutex); | ||
229 | continue; | ||
230 | } | ||
231 | |||
232 | err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, | ||
233 | p->lead_thread); | ||
234 | if (err < 0) { | ||
235 | pr_err("Unexpected pasid %d binding failure\n", | ||
236 | p->pasid); | ||
237 | mutex_unlock(&p->mutex); | ||
238 | break; | ||
239 | } | ||
240 | |||
241 | pdd->bound = PDD_BOUND; | ||
242 | mutex_unlock(&p->mutex); | ||
243 | } | ||
244 | |||
245 | srcu_read_unlock(&kfd_processes_srcu, idx); | ||
246 | |||
247 | return err; | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Mark currently bound processes as PDD_BOUND_SUSPENDED. These | ||
252 | * processes will be restored to PDD_BOUND state in | ||
253 | * kfd_bind_processes_to_device. | ||
254 | */ | ||
255 | static void kfd_unbind_processes_from_device(struct kfd_dev *kfd) | ||
256 | { | ||
257 | struct kfd_process_device *pdd; | ||
258 | struct kfd_process *p; | ||
259 | unsigned int temp; | ||
260 | |||
261 | int idx = srcu_read_lock(&kfd_processes_srcu); | ||
262 | |||
263 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | ||
264 | mutex_lock(&p->mutex); | ||
265 | pdd = kfd_get_process_device_data(kfd, p); | ||
266 | |||
267 | if (WARN_ON(!pdd)) { | ||
268 | mutex_unlock(&p->mutex); | ||
269 | continue; | ||
270 | } | ||
271 | |||
272 | if (pdd->bound == PDD_BOUND) | ||
273 | pdd->bound = PDD_BOUND_SUSPENDED; | ||
274 | mutex_unlock(&p->mutex); | ||
275 | } | ||
276 | |||
277 | srcu_read_unlock(&kfd_processes_srcu, idx); | ||
278 | } | ||
279 | |||
280 | /** kfd_iommu_suspend - Prepare IOMMU for suspend | ||
281 | * | ||
282 | * This unbinds processes from the device and disables the IOMMU for | ||
283 | * the device. | ||
284 | */ | ||
285 | void kfd_iommu_suspend(struct kfd_dev *kfd) | ||
286 | { | ||
287 | if (!kfd->device_info->needs_iommu_device) | ||
288 | return; | ||
289 | |||
290 | kfd_unbind_processes_from_device(kfd); | ||
291 | |||
292 | amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); | ||
293 | amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); | ||
294 | amd_iommu_free_device(kfd->pdev); | ||
295 | } | ||
296 | |||
297 | /** kfd_iommu_resume - Restore IOMMU after resume | ||
298 | * | ||
299 | * This reinitializes the IOMMU for the device and re-binds previously | ||
300 | * suspended processes to the device. | ||
301 | */ | ||
302 | int kfd_iommu_resume(struct kfd_dev *kfd) | ||
303 | { | ||
304 | unsigned int pasid_limit; | ||
305 | int err; | ||
306 | |||
307 | if (!kfd->device_info->needs_iommu_device) | ||
308 | return 0; | ||
309 | |||
310 | pasid_limit = kfd_get_pasid_limit(); | ||
311 | |||
312 | err = amd_iommu_init_device(kfd->pdev, pasid_limit); | ||
313 | if (err) | ||
314 | return -ENXIO; | ||
315 | |||
316 | amd_iommu_set_invalidate_ctx_cb(kfd->pdev, | ||
317 | iommu_pasid_shutdown_callback); | ||
318 | amd_iommu_set_invalid_ppr_cb(kfd->pdev, | ||
319 | iommu_invalid_ppr_cb); | ||
320 | |||
321 | err = kfd_bind_processes_to_device(kfd); | ||
322 | if (err) { | ||
323 | amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); | ||
324 | amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); | ||
325 | amd_iommu_free_device(kfd->pdev); | ||
326 | return err; | ||
327 | } | ||
328 | |||
329 | return 0; | ||
330 | } | ||
331 | |||
332 | extern bool amd_iommu_pc_supported(void); | ||
333 | extern u8 amd_iommu_pc_get_max_banks(u16 devid); | ||
334 | extern u8 amd_iommu_pc_get_max_counters(u16 devid); | ||
335 | |||
336 | /** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology | ||
337 | */ | ||
338 | int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) | ||
339 | { | ||
340 | struct kfd_perf_properties *props; | ||
341 | |||
342 | if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT)) | ||
343 | return 0; | ||
344 | |||
345 | if (!amd_iommu_pc_supported()) | ||
346 | return 0; | ||
347 | |||
348 | props = kfd_alloc_struct(props); | ||
349 | if (!props) | ||
350 | return -ENOMEM; | ||
351 | strcpy(props->block_name, "iommu"); | ||
352 | props->max_concurrent = amd_iommu_pc_get_max_banks(0) * | ||
353 | amd_iommu_pc_get_max_counters(0); /* assume one iommu */ | ||
354 | list_add_tail(&props->list, &kdev->perf_props); | ||
355 | |||
356 | return 0; | ||
357 | } | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h new file mode 100644 index 000000000000..dd23d9fdf6a8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h | |||
@@ -0,0 +1,78 @@ | |||
1 | /* | ||
2 | * Copyright 2018 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #ifndef __KFD_IOMMU_H__ | ||
24 | #define __KFD_IOMMU_H__ | ||
25 | |||
26 | #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) | ||
27 | |||
28 | #define KFD_SUPPORT_IOMMU_V2 | ||
29 | |||
30 | int kfd_iommu_check_device(struct kfd_dev *kfd); | ||
31 | int kfd_iommu_device_init(struct kfd_dev *kfd); | ||
32 | |||
33 | int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd); | ||
34 | void kfd_iommu_unbind_process(struct kfd_process *p); | ||
35 | |||
36 | void kfd_iommu_suspend(struct kfd_dev *kfd); | ||
37 | int kfd_iommu_resume(struct kfd_dev *kfd); | ||
38 | |||
39 | int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev); | ||
40 | |||
41 | #else | ||
42 | |||
43 | static inline int kfd_iommu_check_device(struct kfd_dev *kfd) | ||
44 | { | ||
45 | return -ENODEV; | ||
46 | } | ||
47 | static inline int kfd_iommu_device_init(struct kfd_dev *kfd) | ||
48 | { | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static inline int kfd_iommu_bind_process_to_device( | ||
53 | struct kfd_process_device *pdd) | ||
54 | { | ||
55 | return 0; | ||
56 | } | ||
57 | static inline void kfd_iommu_unbind_process(struct kfd_process *p) | ||
58 | { | ||
59 | /* empty */ | ||
60 | } | ||
61 | |||
62 | static inline void kfd_iommu_suspend(struct kfd_dev *kfd) | ||
63 | { | ||
64 | /* empty */ | ||
65 | } | ||
66 | static inline int kfd_iommu_resume(struct kfd_dev *kfd) | ||
67 | { | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) | ||
72 | { | ||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | #endif /* defined(CONFIG_AMD_IOMMU_V2) */ | ||
77 | |||
78 | #endif /* __KFD_IOMMU_H__ */ | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 5dc6567d4a13..69f496485331 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | |||
@@ -297,10 +297,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, | |||
297 | 297 | ||
298 | switch (dev->device_info->asic_family) { | 298 | switch (dev->device_info->asic_family) { |
299 | case CHIP_CARRIZO: | 299 | case CHIP_CARRIZO: |
300 | case CHIP_TONGA: | ||
301 | case CHIP_FIJI: | ||
302 | case CHIP_POLARIS10: | ||
303 | case CHIP_POLARIS11: | ||
300 | kernel_queue_init_vi(&kq->ops_asic_specific); | 304 | kernel_queue_init_vi(&kq->ops_asic_specific); |
301 | break; | 305 | break; |
302 | 306 | ||
303 | case CHIP_KAVERI: | 307 | case CHIP_KAVERI: |
308 | case CHIP_HAWAII: | ||
304 | kernel_queue_init_cik(&kq->ops_asic_specific); | 309 | kernel_queue_init_cik(&kq->ops_asic_specific); |
305 | break; | 310 | break; |
306 | default: | 311 | default: |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 3ac72bed4f31..65574c6a10b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c | |||
@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = { | |||
43 | .interrupt = kgd2kfd_interrupt, | 43 | .interrupt = kgd2kfd_interrupt, |
44 | .suspend = kgd2kfd_suspend, | 44 | .suspend = kgd2kfd_suspend, |
45 | .resume = kgd2kfd_resume, | 45 | .resume = kgd2kfd_resume, |
46 | .schedule_evict_and_restore_process = | ||
47 | kgd2kfd_schedule_evict_and_restore_process, | ||
46 | }; | 48 | }; |
47 | 49 | ||
48 | int sched_policy = KFD_SCHED_POLICY_HWS; | 50 | int sched_policy = KFD_SCHED_POLICY_HWS; |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index dfd260ef81ff..ee7061e1c466 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | |||
@@ -29,8 +29,15 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, | |||
29 | switch (dev->device_info->asic_family) { | 29 | switch (dev->device_info->asic_family) { |
30 | case CHIP_KAVERI: | 30 | case CHIP_KAVERI: |
31 | return mqd_manager_init_cik(type, dev); | 31 | return mqd_manager_init_cik(type, dev); |
32 | case CHIP_HAWAII: | ||
33 | return mqd_manager_init_cik_hawaii(type, dev); | ||
32 | case CHIP_CARRIZO: | 34 | case CHIP_CARRIZO: |
33 | return mqd_manager_init_vi(type, dev); | 35 | return mqd_manager_init_vi(type, dev); |
36 | case CHIP_TONGA: | ||
37 | case CHIP_FIJI: | ||
38 | case CHIP_POLARIS10: | ||
39 | case CHIP_POLARIS11: | ||
40 | return mqd_manager_init_vi_tonga(type, dev); | ||
34 | default: | 41 | default: |
35 | WARN(1, "Unexpected ASIC family %u", | 42 | WARN(1, "Unexpected ASIC family %u", |
36 | dev->device_info->asic_family); | 43 | dev->device_info->asic_family); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index f8ef4a051e08..c00c325ed3c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | |||
@@ -170,14 +170,19 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, | |||
170 | mms); | 170 | mms); |
171 | } | 171 | } |
172 | 172 | ||
173 | static int update_mqd(struct mqd_manager *mm, void *mqd, | 173 | static int __update_mqd(struct mqd_manager *mm, void *mqd, |
174 | struct queue_properties *q) | 174 | struct queue_properties *q, unsigned int atc_bit) |
175 | { | 175 | { |
176 | struct cik_mqd *m; | 176 | struct cik_mqd *m; |
177 | 177 | ||
178 | m = get_mqd(mqd); | 178 | m = get_mqd(mqd); |
179 | m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | | 179 | m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | |
180 | DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; | 180 | DEFAULT_MIN_AVAIL_SIZE; |
181 | m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; | ||
182 | if (atc_bit) { | ||
183 | m->cp_hqd_pq_control |= PQ_ATC_EN; | ||
184 | m->cp_hqd_ib_control |= IB_ATC_EN; | ||
185 | } | ||
181 | 186 | ||
182 | /* | 187 | /* |
183 | * Calculating queue size which is log base 2 of actual queue size -1 | 188 | * Calculating queue size which is log base 2 of actual queue size -1 |
@@ -197,11 +202,24 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, | |||
197 | 202 | ||
198 | q->is_active = (q->queue_size > 0 && | 203 | q->is_active = (q->queue_size > 0 && |
199 | q->queue_address != 0 && | 204 | q->queue_address != 0 && |
200 | q->queue_percent > 0); | 205 | q->queue_percent > 0 && |
206 | !q->is_evicted); | ||
201 | 207 | ||
202 | return 0; | 208 | return 0; |
203 | } | 209 | } |
204 | 210 | ||
211 | static int update_mqd(struct mqd_manager *mm, void *mqd, | ||
212 | struct queue_properties *q) | ||
213 | { | ||
214 | return __update_mqd(mm, mqd, q, 1); | ||
215 | } | ||
216 | |||
217 | static int update_mqd_hawaii(struct mqd_manager *mm, void *mqd, | ||
218 | struct queue_properties *q) | ||
219 | { | ||
220 | return __update_mqd(mm, mqd, q, 0); | ||
221 | } | ||
222 | |||
205 | static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, | 223 | static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, |
206 | struct queue_properties *q) | 224 | struct queue_properties *q) |
207 | { | 225 | { |
@@ -228,7 +246,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, | |||
228 | 246 | ||
229 | q->is_active = (q->queue_size > 0 && | 247 | q->is_active = (q->queue_size > 0 && |
230 | q->queue_address != 0 && | 248 | q->queue_address != 0 && |
231 | q->queue_percent > 0); | 249 | q->queue_percent > 0 && |
250 | !q->is_evicted); | ||
232 | 251 | ||
233 | return 0; | 252 | return 0; |
234 | } | 253 | } |
@@ -360,7 +379,8 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, | |||
360 | 379 | ||
361 | q->is_active = (q->queue_size > 0 && | 380 | q->is_active = (q->queue_size > 0 && |
362 | q->queue_address != 0 && | 381 | q->queue_address != 0 && |
363 | q->queue_percent > 0); | 382 | q->queue_percent > 0 && |
383 | !q->is_evicted); | ||
364 | 384 | ||
365 | return 0; | 385 | return 0; |
366 | } | 386 | } |
@@ -441,3 +461,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, | |||
441 | return mqd; | 461 | return mqd; |
442 | } | 462 | } |
443 | 463 | ||
464 | struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, | ||
465 | struct kfd_dev *dev) | ||
466 | { | ||
467 | struct mqd_manager *mqd; | ||
468 | |||
469 | mqd = mqd_manager_init_cik(type, dev); | ||
470 | if (!mqd) | ||
471 | return NULL; | ||
472 | if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) | ||
473 | mqd->update_mqd = update_mqd_hawaii; | ||
474 | return mqd; | ||
475 | } | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 971aec0637dc..89e4242e43e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | |||
@@ -151,6 +151,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, | |||
151 | 151 | ||
152 | m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); | 152 | m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); |
153 | m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); | 153 | m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); |
154 | m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); | ||
155 | m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); | ||
154 | 156 | ||
155 | m->cp_hqd_pq_doorbell_control = | 157 | m->cp_hqd_pq_doorbell_control = |
156 | q->doorbell_off << | 158 | q->doorbell_off << |
@@ -196,7 +198,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, | |||
196 | 198 | ||
197 | q->is_active = (q->queue_size > 0 && | 199 | q->is_active = (q->queue_size > 0 && |
198 | q->queue_address != 0 && | 200 | q->queue_address != 0 && |
199 | q->queue_percent > 0); | 201 | q->queue_percent > 0 && |
202 | !q->is_evicted); | ||
200 | 203 | ||
201 | return 0; | 204 | return 0; |
202 | } | 205 | } |
@@ -208,6 +211,12 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, | |||
208 | return __update_mqd(mm, mqd, q, MTYPE_CC, 1); | 211 | return __update_mqd(mm, mqd, q, MTYPE_CC, 1); |
209 | } | 212 | } |
210 | 213 | ||
214 | static int update_mqd_tonga(struct mqd_manager *mm, void *mqd, | ||
215 | struct queue_properties *q) | ||
216 | { | ||
217 | return __update_mqd(mm, mqd, q, MTYPE_UC, 0); | ||
218 | } | ||
219 | |||
211 | static int destroy_mqd(struct mqd_manager *mm, void *mqd, | 220 | static int destroy_mqd(struct mqd_manager *mm, void *mqd, |
212 | enum kfd_preempt_type type, | 221 | enum kfd_preempt_type type, |
213 | unsigned int timeout, uint32_t pipe_id, | 222 | unsigned int timeout, uint32_t pipe_id, |
@@ -334,7 +343,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, | |||
334 | 343 | ||
335 | q->is_active = (q->queue_size > 0 && | 344 | q->is_active = (q->queue_size > 0 && |
336 | q->queue_address != 0 && | 345 | q->queue_address != 0 && |
337 | q->queue_percent > 0); | 346 | q->queue_percent > 0 && |
347 | !q->is_evicted); | ||
338 | 348 | ||
339 | return 0; | 349 | return 0; |
340 | } | 350 | } |
@@ -432,3 +442,16 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, | |||
432 | 442 | ||
433 | return mqd; | 443 | return mqd; |
434 | } | 444 | } |
445 | |||
446 | struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, | ||
447 | struct kfd_dev *dev) | ||
448 | { | ||
449 | struct mqd_manager *mqd; | ||
450 | |||
451 | mqd = mqd_manager_init_vi(type, dev); | ||
452 | if (!mqd) | ||
453 | return NULL; | ||
454 | if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) | ||
455 | mqd->update_mqd = update_mqd_tonga; | ||
456 | return mqd; | ||
457 | } | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0bedcf9cc08c..cac7aa258162 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h | |||
@@ -158,6 +158,8 @@ struct kfd_device_info { | |||
158 | uint8_t num_of_watch_points; | 158 | uint8_t num_of_watch_points; |
159 | uint16_t mqd_size_aligned; | 159 | uint16_t mqd_size_aligned; |
160 | bool supports_cwsr; | 160 | bool supports_cwsr; |
161 | bool needs_iommu_device; | ||
162 | bool needs_pci_atomics; | ||
161 | }; | 163 | }; |
162 | 164 | ||
163 | struct kfd_mem_obj { | 165 | struct kfd_mem_obj { |
@@ -333,7 +335,11 @@ enum kfd_queue_format { | |||
333 | * @is_interop: Defines if this is a interop queue. Interop queue means that | 335 | * @is_interop: Defines if this is a interop queue. Interop queue means that |
334 | * the queue can access both graphics and compute resources. | 336 | * the queue can access both graphics and compute resources. |
335 | * | 337 | * |
336 | * @is_active: Defines if the queue is active or not. | 338 | * @is_evicted: Defines if the queue is evicted. Only active queues |
339 | * are evicted, rendering them inactive. | ||
340 | * | ||
341 | * @is_active: Defines if the queue is active or not. @is_active and | ||
342 | * @is_evicted are protected by the DQM lock. | ||
337 | * | 343 | * |
338 | * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid | 344 | * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid |
339 | * of the queue. | 345 | * of the queue. |
@@ -355,6 +361,7 @@ struct queue_properties { | |||
355 | uint32_t __iomem *doorbell_ptr; | 361 | uint32_t __iomem *doorbell_ptr; |
356 | uint32_t doorbell_off; | 362 | uint32_t doorbell_off; |
357 | bool is_interop; | 363 | bool is_interop; |
364 | bool is_evicted; | ||
358 | bool is_active; | 365 | bool is_active; |
359 | /* Not relevant for user mode queues in cp scheduling */ | 366 | /* Not relevant for user mode queues in cp scheduling */ |
360 | unsigned int vmid; | 367 | unsigned int vmid; |
@@ -458,6 +465,7 @@ struct qcm_process_device { | |||
458 | unsigned int queue_count; | 465 | unsigned int queue_count; |
459 | unsigned int vmid; | 466 | unsigned int vmid; |
460 | bool is_debug; | 467 | bool is_debug; |
468 | unsigned int evicted; /* eviction counter, 0=active */ | ||
461 | 469 | ||
462 | /* This flag tells if we should reset all wavefronts on | 470 | /* This flag tells if we should reset all wavefronts on |
463 | * process termination | 471 | * process termination |
@@ -484,6 +492,17 @@ struct qcm_process_device { | |||
484 | uint64_t tma_addr; | 492 | uint64_t tma_addr; |
485 | }; | 493 | }; |
486 | 494 | ||
495 | /* KFD Memory Eviction */ | ||
496 | |||
497 | /* Approx. wait time before attempting to restore evicted BOs */ | ||
498 | #define PROCESS_RESTORE_TIME_MS 100 | ||
499 | /* Approx. back off time if restore fails due to lack of memory */ | ||
500 | #define PROCESS_BACK_OFF_TIME_MS 100 | ||
501 | /* Approx. time before evicting the process again */ | ||
502 | #define PROCESS_ACTIVE_TIME_MS 10 | ||
503 | |||
504 | int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, | ||
505 | struct dma_fence *fence); | ||
487 | 506 | ||
488 | enum kfd_pdd_bound { | 507 | enum kfd_pdd_bound { |
489 | PDD_UNBOUND = 0, | 508 | PDD_UNBOUND = 0, |
@@ -516,8 +535,8 @@ struct kfd_process_device { | |||
516 | uint64_t scratch_base; | 535 | uint64_t scratch_base; |
517 | uint64_t scratch_limit; | 536 | uint64_t scratch_limit; |
518 | 537 | ||
519 | /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ | 538 | /* VM context for GPUVM allocations */ |
520 | enum kfd_pdd_bound bound; | 539 | void *vm; |
521 | 540 | ||
522 | /* Flag used to tell the pdd has dequeued from the dqm. | 541 | /* Flag used to tell the pdd has dequeued from the dqm. |
523 | * This is used to prevent dev->dqm->ops.process_termination() from | 542 | * This is used to prevent dev->dqm->ops.process_termination() from |
@@ -525,6 +544,9 @@ struct kfd_process_device { | |||
525 | * function. | 544 | * function. |
526 | */ | 545 | */ |
527 | bool already_dequeued; | 546 | bool already_dequeued; |
547 | |||
548 | /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ | ||
549 | enum kfd_pdd_bound bound; | ||
528 | }; | 550 | }; |
529 | 551 | ||
530 | #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) | 552 | #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) |
@@ -587,8 +609,30 @@ struct kfd_process { | |||
587 | size_t signal_mapped_size; | 609 | size_t signal_mapped_size; |
588 | size_t signal_event_count; | 610 | size_t signal_event_count; |
589 | bool signal_event_limit_reached; | 611 | bool signal_event_limit_reached; |
612 | |||
613 | /* Information used for memory eviction */ | ||
614 | void *kgd_process_info; | ||
615 | /* Eviction fence that is attached to all the BOs of this process. The | ||
616 | * fence will be triggered during eviction and new one will be created | ||
617 | * during restore | ||
618 | */ | ||
619 | struct dma_fence *ef; | ||
620 | |||
621 | /* Work items for evicting and restoring BOs */ | ||
622 | struct delayed_work eviction_work; | ||
623 | struct delayed_work restore_work; | ||
624 | /* seqno of the last scheduled eviction */ | ||
625 | unsigned int last_eviction_seqno; | ||
626 | /* Approx. the last timestamp (in jiffies) when the process was | ||
627 | * restored after an eviction | ||
628 | */ | ||
629 | unsigned long last_restore_timestamp; | ||
590 | }; | 630 | }; |
591 | 631 | ||
632 | #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ | ||
633 | extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); | ||
634 | extern struct srcu_struct kfd_processes_srcu; | ||
635 | |||
592 | /** | 636 | /** |
593 | * Ioctl function type. | 637 | * Ioctl function type. |
594 | * | 638 | * |
@@ -612,13 +656,13 @@ void kfd_process_destroy_wq(void); | |||
612 | struct kfd_process *kfd_create_process(struct file *filep); | 656 | struct kfd_process *kfd_create_process(struct file *filep); |
613 | struct kfd_process *kfd_get_process(const struct task_struct *); | 657 | struct kfd_process *kfd_get_process(const struct task_struct *); |
614 | struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); | 658 | struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); |
659 | struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); | ||
615 | void kfd_unref_process(struct kfd_process *p); | 660 | void kfd_unref_process(struct kfd_process *p); |
661 | void kfd_suspend_all_processes(void); | ||
662 | int kfd_resume_all_processes(void); | ||
616 | 663 | ||
617 | struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, | 664 | struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, |
618 | struct kfd_process *p); | 665 | struct kfd_process *p); |
619 | int kfd_bind_processes_to_device(struct kfd_dev *dev); | ||
620 | void kfd_unbind_processes_from_device(struct kfd_dev *dev); | ||
621 | void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid); | ||
622 | struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, | 666 | struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, |
623 | struct kfd_process *p); | 667 | struct kfd_process *p); |
624 | struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, | 668 | struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, |
@@ -705,8 +749,12 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, | |||
705 | struct kfd_dev *dev); | 749 | struct kfd_dev *dev); |
706 | struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, | 750 | struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, |
707 | struct kfd_dev *dev); | 751 | struct kfd_dev *dev); |
752 | struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, | ||
753 | struct kfd_dev *dev); | ||
708 | struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, | 754 | struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, |
709 | struct kfd_dev *dev); | 755 | struct kfd_dev *dev); |
756 | struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, | ||
757 | struct kfd_dev *dev); | ||
710 | struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); | 758 | struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); |
711 | void device_queue_manager_uninit(struct device_queue_manager *dqm); | 759 | void device_queue_manager_uninit(struct device_queue_manager *dqm); |
712 | struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, | 760 | struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, |
@@ -795,6 +843,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, | |||
795 | uint64_t *event_page_offset, uint32_t *event_slot_index); | 843 | uint64_t *event_page_offset, uint32_t *event_slot_index); |
796 | int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); | 844 | int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); |
797 | 845 | ||
846 | void kfd_flush_tlb(struct kfd_process_device *pdd); | ||
847 | |||
798 | int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); | 848 | int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); |
799 | 849 | ||
800 | /* Debugfs */ | 850 | /* Debugfs */ |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 4ff5f0fe6db8..18b2b86ad503 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c | |||
@@ -34,17 +34,18 @@ | |||
34 | struct mm_struct; | 34 | struct mm_struct; |
35 | 35 | ||
36 | #include "kfd_priv.h" | 36 | #include "kfd_priv.h" |
37 | #include "kfd_device_queue_manager.h" | ||
37 | #include "kfd_dbgmgr.h" | 38 | #include "kfd_dbgmgr.h" |
39 | #include "kfd_iommu.h" | ||
38 | 40 | ||
39 | /* | 41 | /* |
40 | * List of struct kfd_process (field kfd_process). | 42 | * List of struct kfd_process (field kfd_process). |
41 | * Unique/indexed by mm_struct* | 43 | * Unique/indexed by mm_struct* |
42 | */ | 44 | */ |
43 | #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ | 45 | DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); |
44 | static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); | ||
45 | static DEFINE_MUTEX(kfd_processes_mutex); | 46 | static DEFINE_MUTEX(kfd_processes_mutex); |
46 | 47 | ||
47 | DEFINE_STATIC_SRCU(kfd_processes_srcu); | 48 | DEFINE_SRCU(kfd_processes_srcu); |
48 | 49 | ||
49 | static struct workqueue_struct *kfd_process_wq; | 50 | static struct workqueue_struct *kfd_process_wq; |
50 | 51 | ||
@@ -54,6 +55,9 @@ static struct kfd_process *create_process(const struct task_struct *thread, | |||
54 | struct file *filep); | 55 | struct file *filep); |
55 | static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); | 56 | static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); |
56 | 57 | ||
58 | static void evict_process_worker(struct work_struct *work); | ||
59 | static void restore_process_worker(struct work_struct *work); | ||
60 | |||
57 | 61 | ||
58 | void kfd_process_create_wq(void) | 62 | void kfd_process_create_wq(void) |
59 | { | 63 | { |
@@ -154,6 +158,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) | |||
154 | pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", | 158 | pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", |
155 | pdd->dev->id, p->pasid); | 159 | pdd->dev->id, p->pasid); |
156 | 160 | ||
161 | if (pdd->vm) | ||
162 | pdd->dev->kfd2kgd->destroy_process_vm( | ||
163 | pdd->dev->kgd, pdd->vm); | ||
164 | |||
157 | list_del(&pdd->per_device_list); | 165 | list_del(&pdd->per_device_list); |
158 | 166 | ||
159 | if (pdd->qpd.cwsr_kaddr) | 167 | if (pdd->qpd.cwsr_kaddr) |
@@ -173,16 +181,11 @@ static void kfd_process_wq_release(struct work_struct *work) | |||
173 | { | 181 | { |
174 | struct kfd_process *p = container_of(work, struct kfd_process, | 182 | struct kfd_process *p = container_of(work, struct kfd_process, |
175 | release_work); | 183 | release_work); |
176 | struct kfd_process_device *pdd; | ||
177 | 184 | ||
178 | pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid); | 185 | kfd_iommu_unbind_process(p); |
179 | |||
180 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { | ||
181 | if (pdd->bound == PDD_BOUND) | ||
182 | amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); | ||
183 | } | ||
184 | 186 | ||
185 | kfd_process_destroy_pdds(p); | 187 | kfd_process_destroy_pdds(p); |
188 | dma_fence_put(p->ef); | ||
186 | 189 | ||
187 | kfd_event_free_process(p); | 190 | kfd_event_free_process(p); |
188 | 191 | ||
@@ -230,6 +233,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, | |||
230 | mutex_unlock(&kfd_processes_mutex); | 233 | mutex_unlock(&kfd_processes_mutex); |
231 | synchronize_srcu(&kfd_processes_srcu); | 234 | synchronize_srcu(&kfd_processes_srcu); |
232 | 235 | ||
236 | cancel_delayed_work_sync(&p->eviction_work); | ||
237 | cancel_delayed_work_sync(&p->restore_work); | ||
238 | |||
233 | mutex_lock(&p->mutex); | 239 | mutex_lock(&p->mutex); |
234 | 240 | ||
235 | /* Iterate over all process device data structures and if the | 241 | /* Iterate over all process device data structures and if the |
@@ -351,6 +357,10 @@ static struct kfd_process *create_process(const struct task_struct *thread, | |||
351 | if (err != 0) | 357 | if (err != 0) |
352 | goto err_init_apertures; | 358 | goto err_init_apertures; |
353 | 359 | ||
360 | INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); | ||
361 | INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); | ||
362 | process->last_restore_timestamp = get_jiffies_64(); | ||
363 | |||
354 | err = kfd_process_init_cwsr(process, filep); | 364 | err = kfd_process_init_cwsr(process, filep); |
355 | if (err) | 365 | if (err) |
356 | goto err_init_cwsr; | 366 | goto err_init_cwsr; |
@@ -402,12 +412,24 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, | |||
402 | INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); | 412 | INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); |
403 | pdd->qpd.dqm = dev->dqm; | 413 | pdd->qpd.dqm = dev->dqm; |
404 | pdd->qpd.pqm = &p->pqm; | 414 | pdd->qpd.pqm = &p->pqm; |
415 | pdd->qpd.evicted = 0; | ||
405 | pdd->process = p; | 416 | pdd->process = p; |
406 | pdd->bound = PDD_UNBOUND; | 417 | pdd->bound = PDD_UNBOUND; |
407 | pdd->already_dequeued = false; | 418 | pdd->already_dequeued = false; |
408 | list_add(&pdd->per_device_list, &p->per_device_data); | 419 | list_add(&pdd->per_device_list, &p->per_device_data); |
409 | 420 | ||
421 | /* Create the GPUVM context for this specific device */ | ||
422 | if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm, | ||
423 | &p->kgd_process_info, &p->ef)) { | ||
424 | pr_err("Failed to create process VM object\n"); | ||
425 | goto err_create_pdd; | ||
426 | } | ||
410 | return pdd; | 427 | return pdd; |
428 | |||
429 | err_create_pdd: | ||
430 | list_del(&pdd->per_device_list); | ||
431 | kfree(pdd); | ||
432 | return NULL; | ||
411 | } | 433 | } |
412 | 434 | ||
413 | /* | 435 | /* |
@@ -429,174 +451,256 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, | |||
429 | return ERR_PTR(-ENOMEM); | 451 | return ERR_PTR(-ENOMEM); |
430 | } | 452 | } |
431 | 453 | ||
432 | if (pdd->bound == PDD_BOUND) { | 454 | err = kfd_iommu_bind_process_to_device(pdd); |
433 | return pdd; | 455 | if (err) |
434 | } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { | ||
435 | pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); | ||
436 | return ERR_PTR(-EINVAL); | ||
437 | } | ||
438 | |||
439 | err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); | ||
440 | if (err < 0) | ||
441 | return ERR_PTR(err); | 456 | return ERR_PTR(err); |
442 | 457 | ||
443 | pdd->bound = PDD_BOUND; | ||
444 | |||
445 | return pdd; | 458 | return pdd; |
446 | } | 459 | } |
447 | 460 | ||
448 | /* | 461 | struct kfd_process_device *kfd_get_first_process_device_data( |
449 | * Bind processes do the device that have been temporarily unbound | 462 | struct kfd_process *p) |
450 | * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. | ||
451 | */ | ||
452 | int kfd_bind_processes_to_device(struct kfd_dev *dev) | ||
453 | { | 463 | { |
454 | struct kfd_process_device *pdd; | 464 | return list_first_entry(&p->per_device_data, |
455 | struct kfd_process *p; | 465 | struct kfd_process_device, |
466 | per_device_list); | ||
467 | } | ||
468 | |||
469 | struct kfd_process_device *kfd_get_next_process_device_data( | ||
470 | struct kfd_process *p, | ||
471 | struct kfd_process_device *pdd) | ||
472 | { | ||
473 | if (list_is_last(&pdd->per_device_list, &p->per_device_data)) | ||
474 | return NULL; | ||
475 | return list_next_entry(pdd, per_device_list); | ||
476 | } | ||
477 | |||
478 | bool kfd_has_process_device_data(struct kfd_process *p) | ||
479 | { | ||
480 | return !(list_empty(&p->per_device_data)); | ||
481 | } | ||
482 | |||
483 | /* This increments the process->ref counter. */ | ||
484 | struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) | ||
485 | { | ||
486 | struct kfd_process *p, *ret_p = NULL; | ||
456 | unsigned int temp; | 487 | unsigned int temp; |
457 | int err = 0; | ||
458 | 488 | ||
459 | int idx = srcu_read_lock(&kfd_processes_srcu); | 489 | int idx = srcu_read_lock(&kfd_processes_srcu); |
460 | 490 | ||
461 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | 491 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { |
462 | mutex_lock(&p->mutex); | 492 | if (p->pasid == pasid) { |
463 | pdd = kfd_get_process_device_data(dev, p); | 493 | kref_get(&p->ref); |
464 | 494 | ret_p = p; | |
465 | if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { | ||
466 | mutex_unlock(&p->mutex); | ||
467 | continue; | ||
468 | } | ||
469 | |||
470 | err = amd_iommu_bind_pasid(dev->pdev, p->pasid, | ||
471 | p->lead_thread); | ||
472 | if (err < 0) { | ||
473 | pr_err("Unexpected pasid %d binding failure\n", | ||
474 | p->pasid); | ||
475 | mutex_unlock(&p->mutex); | ||
476 | break; | 495 | break; |
477 | } | 496 | } |
478 | |||
479 | pdd->bound = PDD_BOUND; | ||
480 | mutex_unlock(&p->mutex); | ||
481 | } | 497 | } |
482 | 498 | ||
483 | srcu_read_unlock(&kfd_processes_srcu, idx); | 499 | srcu_read_unlock(&kfd_processes_srcu, idx); |
484 | 500 | ||
485 | return err; | 501 | return ret_p; |
486 | } | 502 | } |
487 | 503 | ||
488 | /* | 504 | /* This increments the process->ref counter. */ |
489 | * Mark currently bound processes as PDD_BOUND_SUSPENDED. These | 505 | struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) |
490 | * processes will be restored to PDD_BOUND state in | ||
491 | * kfd_bind_processes_to_device. | ||
492 | */ | ||
493 | void kfd_unbind_processes_from_device(struct kfd_dev *dev) | ||
494 | { | 506 | { |
495 | struct kfd_process_device *pdd; | ||
496 | struct kfd_process *p; | 507 | struct kfd_process *p; |
497 | unsigned int temp; | ||
498 | 508 | ||
499 | int idx = srcu_read_lock(&kfd_processes_srcu); | 509 | int idx = srcu_read_lock(&kfd_processes_srcu); |
500 | 510 | ||
501 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | 511 | p = find_process_by_mm(mm); |
502 | mutex_lock(&p->mutex); | 512 | if (p) |
503 | pdd = kfd_get_process_device_data(dev, p); | 513 | kref_get(&p->ref); |
504 | |||
505 | if (WARN_ON(!pdd)) { | ||
506 | mutex_unlock(&p->mutex); | ||
507 | continue; | ||
508 | } | ||
509 | |||
510 | if (pdd->bound == PDD_BOUND) | ||
511 | pdd->bound = PDD_BOUND_SUSPENDED; | ||
512 | mutex_unlock(&p->mutex); | ||
513 | } | ||
514 | 514 | ||
515 | srcu_read_unlock(&kfd_processes_srcu, idx); | 515 | srcu_read_unlock(&kfd_processes_srcu, idx); |
516 | |||
517 | return p; | ||
516 | } | 518 | } |
517 | 519 | ||
518 | void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) | 520 | /* process_evict_queues - Evict all user queues of a process |
521 | * | ||
522 | * Eviction is reference-counted per process-device. This means multiple | ||
523 | * evictions from different sources can be nested safely. | ||
524 | */ | ||
525 | static int process_evict_queues(struct kfd_process *p) | ||
519 | { | 526 | { |
520 | struct kfd_process *p; | ||
521 | struct kfd_process_device *pdd; | 527 | struct kfd_process_device *pdd; |
528 | int r = 0; | ||
529 | unsigned int n_evicted = 0; | ||
522 | 530 | ||
523 | /* | 531 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { |
524 | * Look for the process that matches the pasid. If there is no such | 532 | r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, |
525 | * process, we either released it in amdkfd's own notifier, or there | 533 | &pdd->qpd); |
526 | * is a bug. Unfortunately, there is no way to tell... | 534 | if (r) { |
527 | */ | 535 | pr_err("Failed to evict process queues\n"); |
528 | p = kfd_lookup_process_by_pasid(pasid); | 536 | goto fail; |
529 | if (!p) | 537 | } |
530 | return; | 538 | n_evicted++; |
539 | } | ||
531 | 540 | ||
532 | pr_debug("Unbinding process %d from IOMMU\n", pasid); | 541 | return r; |
533 | 542 | ||
534 | mutex_lock(kfd_get_dbgmgr_mutex()); | 543 | fail: |
544 | /* To keep state consistent, roll back partial eviction by | ||
545 | * restoring queues | ||
546 | */ | ||
547 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { | ||
548 | if (n_evicted == 0) | ||
549 | break; | ||
550 | if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, | ||
551 | &pdd->qpd)) | ||
552 | pr_err("Failed to restore queues\n"); | ||
535 | 553 | ||
536 | if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { | 554 | n_evicted--; |
537 | if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { | ||
538 | kfd_dbgmgr_destroy(dev->dbgmgr); | ||
539 | dev->dbgmgr = NULL; | ||
540 | } | ||
541 | } | 555 | } |
542 | 556 | ||
543 | mutex_unlock(kfd_get_dbgmgr_mutex()); | 557 | return r; |
544 | 558 | } | |
545 | mutex_lock(&p->mutex); | ||
546 | 559 | ||
547 | pdd = kfd_get_process_device_data(dev, p); | 560 | /* process_restore_queues - Restore all user queues of a process */ |
548 | if (pdd) | 561 | static int process_restore_queues(struct kfd_process *p) |
549 | /* For GPU relying on IOMMU, we need to dequeue here | 562 | { |
550 | * when PASID is still bound. | 563 | struct kfd_process_device *pdd; |
551 | */ | 564 | int r, ret = 0; |
552 | kfd_process_dequeue_from_device(pdd); | ||
553 | 565 | ||
554 | mutex_unlock(&p->mutex); | 566 | list_for_each_entry(pdd, &p->per_device_data, per_device_list) { |
567 | r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, | ||
568 | &pdd->qpd); | ||
569 | if (r) { | ||
570 | pr_err("Failed to restore process queues\n"); | ||
571 | if (!ret) | ||
572 | ret = r; | ||
573 | } | ||
574 | } | ||
555 | 575 | ||
556 | kfd_unref_process(p); | 576 | return ret; |
557 | } | 577 | } |
558 | 578 | ||
559 | struct kfd_process_device *kfd_get_first_process_device_data( | 579 | static void evict_process_worker(struct work_struct *work) |
560 | struct kfd_process *p) | ||
561 | { | 580 | { |
562 | return list_first_entry(&p->per_device_data, | 581 | int ret; |
563 | struct kfd_process_device, | 582 | struct kfd_process *p; |
564 | per_device_list); | 583 | struct delayed_work *dwork; |
584 | |||
585 | dwork = to_delayed_work(work); | ||
586 | |||
587 | /* Process termination destroys this worker thread. So during the | ||
588 | * lifetime of this thread, kfd_process p will be valid | ||
589 | */ | ||
590 | p = container_of(dwork, struct kfd_process, eviction_work); | ||
591 | WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, | ||
592 | "Eviction fence mismatch\n"); | ||
593 | |||
594 | /* Narrow window of overlap between restore and evict work | ||
595 | * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos | ||
596 | * unreserves KFD BOs, it is possible to evicted again. But | ||
597 | * restore has few more steps of finish. So lets wait for any | ||
598 | * previous restore work to complete | ||
599 | */ | ||
600 | flush_delayed_work(&p->restore_work); | ||
601 | |||
602 | pr_debug("Started evicting pasid %d\n", p->pasid); | ||
603 | ret = process_evict_queues(p); | ||
604 | if (!ret) { | ||
605 | dma_fence_signal(p->ef); | ||
606 | dma_fence_put(p->ef); | ||
607 | p->ef = NULL; | ||
608 | schedule_delayed_work(&p->restore_work, | ||
609 | msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); | ||
610 | |||
611 | pr_debug("Finished evicting pasid %d\n", p->pasid); | ||
612 | } else | ||
613 | pr_err("Failed to evict queues of pasid %d\n", p->pasid); | ||
565 | } | 614 | } |
566 | 615 | ||
567 | struct kfd_process_device *kfd_get_next_process_device_data( | 616 | static void restore_process_worker(struct work_struct *work) |
568 | struct kfd_process *p, | ||
569 | struct kfd_process_device *pdd) | ||
570 | { | 617 | { |
571 | if (list_is_last(&pdd->per_device_list, &p->per_device_data)) | 618 | struct delayed_work *dwork; |
572 | return NULL; | 619 | struct kfd_process *p; |
573 | return list_next_entry(pdd, per_device_list); | 620 | struct kfd_process_device *pdd; |
621 | int ret = 0; | ||
622 | |||
623 | dwork = to_delayed_work(work); | ||
624 | |||
625 | /* Process termination destroys this worker thread. So during the | ||
626 | * lifetime of this thread, kfd_process p will be valid | ||
627 | */ | ||
628 | p = container_of(dwork, struct kfd_process, restore_work); | ||
629 | |||
630 | /* Call restore_process_bos on the first KGD device. This function | ||
631 | * takes care of restoring the whole process including other devices. | ||
632 | * Restore can fail if enough memory is not available. If so, | ||
633 | * reschedule again. | ||
634 | */ | ||
635 | pdd = list_first_entry(&p->per_device_data, | ||
636 | struct kfd_process_device, | ||
637 | per_device_list); | ||
638 | |||
639 | pr_debug("Started restoring pasid %d\n", p->pasid); | ||
640 | |||
641 | /* Setting last_restore_timestamp before successful restoration. | ||
642 | * Otherwise this would have to be set by KGD (restore_process_bos) | ||
643 | * before KFD BOs are unreserved. If not, the process can be evicted | ||
644 | * again before the timestamp is set. | ||
645 | * If restore fails, the timestamp will be set again in the next | ||
646 | * attempt. This would mean that the minimum GPU quanta would be | ||
647 | * PROCESS_ACTIVE_TIME_MS - (time to execute the following two | ||
648 | * functions) | ||
649 | */ | ||
650 | |||
651 | p->last_restore_timestamp = get_jiffies_64(); | ||
652 | ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info, | ||
653 | &p->ef); | ||
654 | if (ret) { | ||
655 | pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", | ||
656 | p->pasid, PROCESS_BACK_OFF_TIME_MS); | ||
657 | ret = schedule_delayed_work(&p->restore_work, | ||
658 | msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); | ||
659 | WARN(!ret, "reschedule restore work failed\n"); | ||
660 | return; | ||
661 | } | ||
662 | |||
663 | ret = process_restore_queues(p); | ||
664 | if (!ret) | ||
665 | pr_debug("Finished restoring pasid %d\n", p->pasid); | ||
666 | else | ||
667 | pr_err("Failed to restore queues of pasid %d\n", p->pasid); | ||
574 | } | 668 | } |
575 | 669 | ||
576 | bool kfd_has_process_device_data(struct kfd_process *p) | 670 | void kfd_suspend_all_processes(void) |
577 | { | 671 | { |
578 | return !(list_empty(&p->per_device_data)); | 672 | struct kfd_process *p; |
673 | unsigned int temp; | ||
674 | int idx = srcu_read_lock(&kfd_processes_srcu); | ||
675 | |||
676 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | ||
677 | cancel_delayed_work_sync(&p->eviction_work); | ||
678 | cancel_delayed_work_sync(&p->restore_work); | ||
679 | |||
680 | if (process_evict_queues(p)) | ||
681 | pr_err("Failed to suspend process %d\n", p->pasid); | ||
682 | dma_fence_signal(p->ef); | ||
683 | dma_fence_put(p->ef); | ||
684 | p->ef = NULL; | ||
685 | } | ||
686 | srcu_read_unlock(&kfd_processes_srcu, idx); | ||
579 | } | 687 | } |
580 | 688 | ||
581 | /* This increments the process->ref counter. */ | 689 | int kfd_resume_all_processes(void) |
582 | struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) | ||
583 | { | 690 | { |
584 | struct kfd_process *p, *ret_p = NULL; | 691 | struct kfd_process *p; |
585 | unsigned int temp; | 692 | unsigned int temp; |
586 | 693 | int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); | |
587 | int idx = srcu_read_lock(&kfd_processes_srcu); | ||
588 | 694 | ||
589 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { | 695 | hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { |
590 | if (p->pasid == pasid) { | 696 | if (!schedule_delayed_work(&p->restore_work, 0)) { |
591 | kref_get(&p->ref); | 697 | pr_err("Restore process %d failed during resume\n", |
592 | ret_p = p; | 698 | p->pasid); |
593 | break; | 699 | ret = -EFAULT; |
594 | } | 700 | } |
595 | } | 701 | } |
596 | |||
597 | srcu_read_unlock(&kfd_processes_srcu, idx); | 702 | srcu_read_unlock(&kfd_processes_srcu, idx); |
598 | 703 | return ret; | |
599 | return ret_p; | ||
600 | } | 704 | } |
601 | 705 | ||
602 | int kfd_reserved_mem_mmap(struct kfd_process *process, | 706 | int kfd_reserved_mem_mmap(struct kfd_process *process, |
@@ -633,6 +737,22 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, | |||
633 | KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); | 737 | KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); |
634 | } | 738 | } |
635 | 739 | ||
740 | void kfd_flush_tlb(struct kfd_process_device *pdd) | ||
741 | { | ||
742 | struct kfd_dev *dev = pdd->dev; | ||
743 | const struct kfd2kgd_calls *f2g = dev->kfd2kgd; | ||
744 | |||
745 | if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { | ||
746 | /* Nothing to flush until a VMID is assigned, which | ||
747 | * only happens when the first queue is created. | ||
748 | */ | ||
749 | if (pdd->qpd.vmid) | ||
750 | f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); | ||
751 | } else { | ||
752 | f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); | ||
753 | } | ||
754 | } | ||
755 | |||
636 | #if defined(CONFIG_DEBUG_FS) | 756 | #if defined(CONFIG_DEBUG_FS) |
637 | 757 | ||
638 | int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) | 758 | int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 876380632668..7817e327ea6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | |||
@@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, | |||
208 | 208 | ||
209 | case KFD_QUEUE_TYPE_COMPUTE: | 209 | case KFD_QUEUE_TYPE_COMPUTE: |
210 | /* check if there is over subscription */ | 210 | /* check if there is over subscription */ |
211 | if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && | 211 | if ((dev->dqm->sched_policy == |
212 | KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && | ||
212 | ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || | 213 | ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || |
213 | (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { | 214 | (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { |
214 | pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); | 215 | pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c6a76090a725..250615535563 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "kfd_crat.h" | 35 | #include "kfd_crat.h" |
36 | #include "kfd_topology.h" | 36 | #include "kfd_topology.h" |
37 | #include "kfd_device_queue_manager.h" | 37 | #include "kfd_device_queue_manager.h" |
38 | #include "kfd_iommu.h" | ||
38 | 39 | ||
39 | /* topology_device_list - Master list of all topology devices */ | 40 | /* topology_device_list - Master list of all topology devices */ |
40 | static struct list_head topology_device_list; | 41 | static struct list_head topology_device_list; |
@@ -677,7 +678,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, | |||
677 | } | 678 | } |
678 | 679 | ||
679 | /* All hardware blocks have the same number of attributes. */ | 680 | /* All hardware blocks have the same number of attributes. */ |
680 | num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); | 681 | num_attrs = ARRAY_SIZE(perf_attr_iommu); |
681 | list_for_each_entry(perf, &dev->perf_props, list) { | 682 | list_for_each_entry(perf, &dev->perf_props, list) { |
682 | perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) | 683 | perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) |
683 | * num_attrs + sizeof(struct attribute_group), | 684 | * num_attrs + sizeof(struct attribute_group), |
@@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm, | |||
875 | */ | 876 | */ |
876 | static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) | 877 | static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) |
877 | { | 878 | { |
878 | struct kfd_perf_properties *props; | 879 | /* These are the only counters supported so far */ |
879 | 880 | return kfd_iommu_add_perf_counters(kdev); | |
880 | if (amd_iommu_pc_supported()) { | ||
881 | props = kfd_alloc_struct(props); | ||
882 | if (!props) | ||
883 | return -ENOMEM; | ||
884 | strcpy(props->block_name, "iommu"); | ||
885 | props->max_concurrent = amd_iommu_pc_get_max_banks(0) * | ||
886 | amd_iommu_pc_get_max_counters(0); /* assume one iommu */ | ||
887 | list_add_tail(&props->list, &kdev->perf_props); | ||
888 | } | ||
889 | |||
890 | return 0; | ||
891 | } | 881 | } |
892 | 882 | ||
893 | /* kfd_add_non_crat_information - Add information that is not currently | 883 | /* kfd_add_non_crat_information - Add information that is not currently |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 53fca1f45401..c0be2be6dca5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h | |||
@@ -25,7 +25,7 @@ | |||
25 | 25 | ||
26 | #include <linux/types.h> | 26 | #include <linux/types.h> |
27 | #include <linux/list.h> | 27 | #include <linux/list.h> |
28 | #include "kfd_priv.h" | 28 | #include "kfd_crat.h" |
29 | 29 | ||
30 | #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 | 30 | #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 |
31 | 31 | ||
@@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device( | |||
183 | struct list_head *device_list); | 183 | struct list_head *device_list); |
184 | void kfd_release_topology_device_list(struct list_head *device_list); | 184 | void kfd_release_topology_device_list(struct list_head *device_list); |
185 | 185 | ||
186 | extern bool amd_iommu_pc_supported(void); | ||
187 | extern u8 amd_iommu_pc_get_max_banks(u16 devid); | ||
188 | extern u8 amd_iommu_pc_get_max_counters(u16 devid); | ||
189 | |||
190 | #endif /* __KFD_TOPOLOGY_H__ */ | 186 | #endif /* __KFD_TOPOLOGY_H__ */ |
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index a6752bd0c871..1e5c22ceb256 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h | |||
@@ -30,6 +30,7 @@ | |||
30 | 30 | ||
31 | #include <linux/types.h> | 31 | #include <linux/types.h> |
32 | #include <linux/bitmap.h> | 32 | #include <linux/bitmap.h> |
33 | #include <linux/dma-fence.h> | ||
33 | 34 | ||
34 | struct pci_dev; | 35 | struct pci_dev; |
35 | 36 | ||
@@ -107,6 +108,12 @@ struct kgd2kfd_shared_resources { | |||
107 | 108 | ||
108 | /* Number of bytes at start of aperture reserved for KGD. */ | 109 | /* Number of bytes at start of aperture reserved for KGD. */ |
109 | size_t doorbell_start_offset; | 110 | size_t doorbell_start_offset; |
111 | |||
112 | /* GPUVM address space size in bytes */ | ||
113 | uint64_t gpuvm_size; | ||
114 | |||
115 | /* Minor device number of the render node */ | ||
116 | int drm_render_minor; | ||
110 | }; | 117 | }; |
111 | 118 | ||
112 | struct tile_config { | 119 | struct tile_config { |
@@ -120,6 +127,25 @@ struct tile_config { | |||
120 | uint32_t num_ranks; | 127 | uint32_t num_ranks; |
121 | }; | 128 | }; |
122 | 129 | ||
130 | |||
131 | /* | ||
132 | * Allocation flag domains | ||
133 | */ | ||
134 | #define ALLOC_MEM_FLAGS_VRAM (1 << 0) | ||
135 | #define ALLOC_MEM_FLAGS_GTT (1 << 1) | ||
136 | #define ALLOC_MEM_FLAGS_USERPTR (1 << 2) /* TODO */ | ||
137 | #define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* TODO */ | ||
138 | |||
139 | /* | ||
140 | * Allocation flags attributes/access options. | ||
141 | */ | ||
142 | #define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) | ||
143 | #define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) | ||
144 | #define ALLOC_MEM_FLAGS_PUBLIC (1 << 29) | ||
145 | #define ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) /* TODO */ | ||
146 | #define ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) | ||
147 | #define ALLOC_MEM_FLAGS_COHERENT (1 << 26) /* For GFXv9 or later */ | ||
148 | |||
123 | /** | 149 | /** |
124 | * struct kfd2kgd_calls | 150 | * struct kfd2kgd_calls |
125 | * | 151 | * |
@@ -179,6 +205,45 @@ struct tile_config { | |||
179 | * | 205 | * |
180 | * @get_vram_usage: Returns current VRAM usage | 206 | * @get_vram_usage: Returns current VRAM usage |
181 | * | 207 | * |
208 | * @create_process_vm: Create a VM address space for a given process and GPU | ||
209 | * | ||
210 | * @destroy_process_vm: Destroy a VM | ||
211 | * | ||
212 | * @get_process_page_dir: Get physical address of a VM page directory | ||
213 | * | ||
214 | * @set_vm_context_page_table_base: Program page table base for a VMID | ||
215 | * | ||
216 | * @alloc_memory_of_gpu: Allocate GPUVM memory | ||
217 | * | ||
218 | * @free_memory_of_gpu: Free GPUVM memory | ||
219 | * | ||
220 | * @map_memory_to_gpu: Map GPUVM memory into a specific VM address | ||
221 | * space. Allocates and updates page tables and page directories as | ||
222 | * needed. This function may return before all page table updates have | ||
223 | * completed. This allows multiple map operations (on multiple GPUs) | ||
224 | * to happen concurrently. Use sync_memory to synchronize with all | ||
225 | * pending updates. | ||
226 | * | ||
227 | * @unmap_memor_to_gpu: Unmap GPUVM memory from a specific VM address space | ||
228 | * | ||
229 | * @sync_memory: Wait for pending page table updates to complete | ||
230 | * | ||
231 | * @map_gtt_bo_to_kernel: Map a GTT BO for kernel access | ||
232 | * Pins the BO, maps it to kernel address space. Such BOs are never evicted. | ||
233 | * The kernel virtual address remains valid until the BO is freed. | ||
234 | * | ||
235 | * @restore_process_bos: Restore all BOs that belong to the | ||
236 | * process. This is intended for restoring memory mappings after a TTM | ||
237 | * eviction. | ||
238 | * | ||
239 | * @invalidate_tlbs: Invalidate TLBs for a specific PASID | ||
240 | * | ||
241 | * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID | ||
242 | * | ||
243 | * @submit_ib: Submits an IB to the engine specified by inserting the | ||
244 | * IB to the corresponding ring (ring type). The IB is executed with the | ||
245 | * specified VMID in a user mode context. | ||
246 | * | ||
182 | * This structure contains function pointers to services that the kgd driver | 247 | * This structure contains function pointers to services that the kgd driver |
183 | * provides to amdkfd driver. | 248 | * provides to amdkfd driver. |
184 | * | 249 | * |
@@ -258,8 +323,6 @@ struct kfd2kgd_calls { | |||
258 | uint16_t (*get_atc_vmid_pasid_mapping_pasid)( | 323 | uint16_t (*get_atc_vmid_pasid_mapping_pasid)( |
259 | struct kgd_dev *kgd, | 324 | struct kgd_dev *kgd, |
260 | uint8_t vmid); | 325 | uint8_t vmid); |
261 | void (*write_vmid_invalidate_request)(struct kgd_dev *kgd, | ||
262 | uint8_t vmid); | ||
263 | 326 | ||
264 | uint16_t (*get_fw_version)(struct kgd_dev *kgd, | 327 | uint16_t (*get_fw_version)(struct kgd_dev *kgd, |
265 | enum kgd_engine_type type); | 328 | enum kgd_engine_type type); |
@@ -270,6 +333,33 @@ struct kfd2kgd_calls { | |||
270 | void (*get_cu_info)(struct kgd_dev *kgd, | 333 | void (*get_cu_info)(struct kgd_dev *kgd, |
271 | struct kfd_cu_info *cu_info); | 334 | struct kfd_cu_info *cu_info); |
272 | uint64_t (*get_vram_usage)(struct kgd_dev *kgd); | 335 | uint64_t (*get_vram_usage)(struct kgd_dev *kgd); |
336 | |||
337 | int (*create_process_vm)(struct kgd_dev *kgd, void **vm, | ||
338 | void **process_info, struct dma_fence **ef); | ||
339 | void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); | ||
340 | uint32_t (*get_process_page_dir)(void *vm); | ||
341 | void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, | ||
342 | uint32_t vmid, uint32_t page_table_base); | ||
343 | int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va, | ||
344 | uint64_t size, void *vm, | ||
345 | struct kgd_mem **mem, uint64_t *offset, | ||
346 | uint32_t flags); | ||
347 | int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem); | ||
348 | int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, | ||
349 | void *vm); | ||
350 | int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, | ||
351 | void *vm); | ||
352 | int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); | ||
353 | int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, struct kgd_mem *mem, | ||
354 | void **kptr, uint64_t *size); | ||
355 | int (*restore_process_bos)(void *process_info, struct dma_fence **ef); | ||
356 | |||
357 | int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); | ||
358 | int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid); | ||
359 | |||
360 | int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, | ||
361 | uint32_t vmid, uint64_t gpu_addr, | ||
362 | uint32_t *ib_cmd, uint32_t ib_len); | ||
273 | }; | 363 | }; |
274 | 364 | ||
275 | /** | 365 | /** |
@@ -288,6 +378,9 @@ struct kfd2kgd_calls { | |||
288 | * | 378 | * |
289 | * @resume: Notifies amdkfd about a resume action done to a kgd device | 379 | * @resume: Notifies amdkfd about a resume action done to a kgd device |
290 | * | 380 | * |
381 | * @schedule_evict_and_restore_process: Schedules work queue that will prepare | ||
382 | * for safe eviction of KFD BOs that belong to the specified process. | ||
383 | * | ||
291 | * This structure contains function callback pointers so the kgd driver | 384 | * This structure contains function callback pointers so the kgd driver |
292 | * will notify to the amdkfd about certain status changes. | 385 | * will notify to the amdkfd about certain status changes. |
293 | * | 386 | * |
@@ -302,6 +395,8 @@ struct kgd2kfd_calls { | |||
302 | void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); | 395 | void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); |
303 | void (*suspend)(struct kfd_dev *kfd); | 396 | void (*suspend)(struct kfd_dev *kfd); |
304 | int (*resume)(struct kfd_dev *kfd); | 397 | int (*resume)(struct kfd_dev *kfd); |
398 | int (*schedule_evict_and_restore_process)(struct mm_struct *mm, | ||
399 | struct dma_fence *fence); | ||
305 | }; | 400 | }; |
306 | 401 | ||
307 | int kgd2kfd_init(unsigned interface_version, | 402 | int kgd2kfd_init(unsigned interface_version, |
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index f4cab5b3ba9a..111d73ba2d96 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h | |||
@@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args { | |||
263 | }; | 263 | }; |
264 | 264 | ||
265 | struct kfd_ioctl_set_trap_handler_args { | 265 | struct kfd_ioctl_set_trap_handler_args { |
266 | uint64_t tba_addr; /* to KFD */ | 266 | __u64 tba_addr; /* to KFD */ |
267 | uint64_t tma_addr; /* to KFD */ | 267 | __u64 tma_addr; /* to KFD */ |
268 | uint32_t gpu_id; /* to KFD */ | 268 | __u32 gpu_id; /* to KFD */ |
269 | uint32_t pad; | 269 | __u32 pad; |
270 | }; | 270 | }; |
271 | 271 | ||
272 | #define AMDKFD_IOCTL_BASE 'K' | 272 | #define AMDKFD_IOCTL_BASE 'K' |