diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 399 |
1 files changed, 283 insertions, 116 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e714d0..da55a78d7380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <drm/amdgpu_drm.h> | 32 | #include <drm/amdgpu_drm.h> |
33 | #include "amdgpu.h" | 33 | #include "amdgpu.h" |
34 | #include "amdgpu_trace.h" | 34 | #include "amdgpu_trace.h" |
35 | #include "amdgpu_amdkfd.h" | ||
35 | 36 | ||
36 | /* | 37 | /* |
37 | * GPUVM | 38 | * GPUVM |
@@ -75,7 +76,8 @@ struct amdgpu_pte_update_params { | |||
75 | /* indirect buffer to fill with commands */ | 76 | /* indirect buffer to fill with commands */ |
76 | struct amdgpu_ib *ib; | 77 | struct amdgpu_ib *ib; |
77 | /* Function which actually does the update */ | 78 | /* Function which actually does the update */ |
78 | void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, | 79 | void (*func)(struct amdgpu_pte_update_params *params, |
80 | struct amdgpu_bo *bo, uint64_t pe, | ||
79 | uint64_t addr, unsigned count, uint32_t incr, | 81 | uint64_t addr, unsigned count, uint32_t incr, |
80 | uint64_t flags); | 82 | uint64_t flags); |
81 | /* The next two are used during VM update by CPU | 83 | /* The next two are used during VM update by CPU |
@@ -257,6 +259,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) | |||
257 | } | 259 | } |
258 | 260 | ||
259 | /** | 261 | /** |
262 | * amdgpu_vm_clear_bo - initially clear the PDs/PTs | ||
263 | * | ||
264 | * @adev: amdgpu_device pointer | ||
265 | * @bo: BO to clear | ||
266 | * @level: level this BO is at | ||
267 | * | ||
268 | * Root PD needs to be reserved when calling this. | ||
269 | */ | ||
270 | static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | ||
271 | struct amdgpu_vm *vm, struct amdgpu_bo *bo, | ||
272 | unsigned level, bool pte_support_ats) | ||
273 | { | ||
274 | struct ttm_operation_ctx ctx = { true, false }; | ||
275 | struct dma_fence *fence = NULL; | ||
276 | unsigned entries, ats_entries; | ||
277 | struct amdgpu_ring *ring; | ||
278 | struct amdgpu_job *job; | ||
279 | uint64_t addr; | ||
280 | int r; | ||
281 | |||
282 | addr = amdgpu_bo_gpu_offset(bo); | ||
283 | entries = amdgpu_bo_size(bo) / 8; | ||
284 | |||
285 | if (pte_support_ats) { | ||
286 | if (level == adev->vm_manager.root_level) { | ||
287 | ats_entries = amdgpu_vm_level_shift(adev, level); | ||
288 | ats_entries += AMDGPU_GPU_PAGE_SHIFT; | ||
289 | ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; | ||
290 | ats_entries = min(ats_entries, entries); | ||
291 | entries -= ats_entries; | ||
292 | } else { | ||
293 | ats_entries = entries; | ||
294 | entries = 0; | ||
295 | } | ||
296 | } else { | ||
297 | ats_entries = 0; | ||
298 | } | ||
299 | |||
300 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); | ||
301 | |||
302 | r = reservation_object_reserve_shared(bo->tbo.resv); | ||
303 | if (r) | ||
304 | return r; | ||
305 | |||
306 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||
307 | if (r) | ||
308 | goto error; | ||
309 | |||
310 | r = amdgpu_job_alloc_with_ib(adev, 64, &job); | ||
311 | if (r) | ||
312 | goto error; | ||
313 | |||
314 | if (ats_entries) { | ||
315 | uint64_t ats_value; | ||
316 | |||
317 | ats_value = AMDGPU_PTE_DEFAULT_ATC; | ||
318 | if (level != AMDGPU_VM_PTB) | ||
319 | ats_value |= AMDGPU_PDE_PTE; | ||
320 | |||
321 | amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, | ||
322 | ats_entries, 0, ats_value); | ||
323 | addr += ats_entries * 8; | ||
324 | } | ||
325 | |||
326 | if (entries) | ||
327 | amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, | ||
328 | entries, 0, 0); | ||
329 | |||
330 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); | ||
331 | |||
332 | WARN_ON(job->ibs[0].length_dw > 64); | ||
333 | r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, | ||
334 | AMDGPU_FENCE_OWNER_UNDEFINED, false); | ||
335 | if (r) | ||
336 | goto error_free; | ||
337 | |||
338 | r = amdgpu_job_submit(job, ring, &vm->entity, | ||
339 | AMDGPU_FENCE_OWNER_UNDEFINED, &fence); | ||
340 | if (r) | ||
341 | goto error_free; | ||
342 | |||
343 | amdgpu_bo_fence(bo, fence, true); | ||
344 | dma_fence_put(fence); | ||
345 | |||
346 | if (bo->shadow) | ||
347 | return amdgpu_vm_clear_bo(adev, vm, bo->shadow, | ||
348 | level, pte_support_ats); | ||
349 | |||
350 | return 0; | ||
351 | |||
352 | error_free: | ||
353 | amdgpu_job_free(job); | ||
354 | |||
355 | error: | ||
356 | return r; | ||
357 | } | ||
358 | |||
359 | /** | ||
260 | * amdgpu_vm_alloc_levels - allocate the PD/PT levels | 360 | * amdgpu_vm_alloc_levels - allocate the PD/PT levels |
261 | * | 361 | * |
262 | * @adev: amdgpu_device pointer | 362 | * @adev: amdgpu_device pointer |
@@ -270,13 +370,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
270 | struct amdgpu_vm *vm, | 370 | struct amdgpu_vm *vm, |
271 | struct amdgpu_vm_pt *parent, | 371 | struct amdgpu_vm_pt *parent, |
272 | uint64_t saddr, uint64_t eaddr, | 372 | uint64_t saddr, uint64_t eaddr, |
273 | unsigned level) | 373 | unsigned level, bool ats) |
274 | { | 374 | { |
275 | unsigned shift = amdgpu_vm_level_shift(adev, level); | 375 | unsigned shift = amdgpu_vm_level_shift(adev, level); |
276 | unsigned pt_idx, from, to; | 376 | unsigned pt_idx, from, to; |
277 | int r; | ||
278 | u64 flags; | 377 | u64 flags; |
279 | uint64_t init_value = 0; | 378 | int r; |
280 | 379 | ||
281 | if (!parent->entries) { | 380 | if (!parent->entries) { |
282 | unsigned num_entries = amdgpu_vm_num_entries(adev, level); | 381 | unsigned num_entries = amdgpu_vm_num_entries(adev, level); |
@@ -299,21 +398,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
299 | saddr = saddr & ((1 << shift) - 1); | 398 | saddr = saddr & ((1 << shift) - 1); |
300 | eaddr = eaddr & ((1 << shift) - 1); | 399 | eaddr = eaddr & ((1 << shift) - 1); |
301 | 400 | ||
302 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | 401 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
303 | AMDGPU_GEM_CREATE_VRAM_CLEARED; | ||
304 | if (vm->use_cpu_for_update) | 402 | if (vm->use_cpu_for_update) |
305 | flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | 403 | flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
306 | else | 404 | else |
307 | flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 405 | flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | |
308 | AMDGPU_GEM_CREATE_SHADOW); | 406 | AMDGPU_GEM_CREATE_SHADOW); |
309 | 407 | ||
310 | if (vm->pte_support_ats) { | ||
311 | init_value = AMDGPU_PTE_DEFAULT_ATC; | ||
312 | if (level != AMDGPU_VM_PTB) | ||
313 | init_value |= AMDGPU_PDE_PTE; | ||
314 | |||
315 | } | ||
316 | |||
317 | /* walk over the address space and allocate the page tables */ | 408 | /* walk over the address space and allocate the page tables */ |
318 | for (pt_idx = from; pt_idx <= to; ++pt_idx) { | 409 | for (pt_idx = from; pt_idx <= to; ++pt_idx) { |
319 | struct reservation_object *resv = vm->root.base.bo->tbo.resv; | 410 | struct reservation_object *resv = vm->root.base.bo->tbo.resv; |
@@ -323,16 +414,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
323 | if (!entry->base.bo) { | 414 | if (!entry->base.bo) { |
324 | r = amdgpu_bo_create(adev, | 415 | r = amdgpu_bo_create(adev, |
325 | amdgpu_vm_bo_size(adev, level), | 416 | amdgpu_vm_bo_size(adev, level), |
326 | AMDGPU_GPU_PAGE_SIZE, true, | 417 | AMDGPU_GPU_PAGE_SIZE, |
327 | AMDGPU_GEM_DOMAIN_VRAM, | 418 | AMDGPU_GEM_DOMAIN_VRAM, flags, |
328 | flags, | 419 | ttm_bo_type_kernel, resv, &pt); |
329 | NULL, resv, init_value, &pt); | ||
330 | if (r) | 420 | if (r) |
331 | return r; | 421 | return r; |
332 | 422 | ||
423 | r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); | ||
424 | if (r) { | ||
425 | amdgpu_bo_unref(&pt->shadow); | ||
426 | amdgpu_bo_unref(&pt); | ||
427 | return r; | ||
428 | } | ||
429 | |||
333 | if (vm->use_cpu_for_update) { | 430 | if (vm->use_cpu_for_update) { |
334 | r = amdgpu_bo_kmap(pt, NULL); | 431 | r = amdgpu_bo_kmap(pt, NULL); |
335 | if (r) { | 432 | if (r) { |
433 | amdgpu_bo_unref(&pt->shadow); | ||
336 | amdgpu_bo_unref(&pt); | 434 | amdgpu_bo_unref(&pt); |
337 | return r; | 435 | return r; |
338 | } | 436 | } |
@@ -356,7 +454,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
356 | uint64_t sub_eaddr = (pt_idx == to) ? eaddr : | 454 | uint64_t sub_eaddr = (pt_idx == to) ? eaddr : |
357 | ((1 << shift) - 1); | 455 | ((1 << shift) - 1); |
358 | r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, | 456 | r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, |
359 | sub_eaddr, level); | 457 | sub_eaddr, level, ats); |
360 | if (r) | 458 | if (r) |
361 | return r; | 459 | return r; |
362 | } | 460 | } |
@@ -379,26 +477,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, | |||
379 | struct amdgpu_vm *vm, | 477 | struct amdgpu_vm *vm, |
380 | uint64_t saddr, uint64_t size) | 478 | uint64_t saddr, uint64_t size) |
381 | { | 479 | { |
382 | uint64_t last_pfn; | ||
383 | uint64_t eaddr; | 480 | uint64_t eaddr; |
481 | bool ats = false; | ||
384 | 482 | ||
385 | /* validate the parameters */ | 483 | /* validate the parameters */ |
386 | if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) | 484 | if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) |
387 | return -EINVAL; | 485 | return -EINVAL; |
388 | 486 | ||
389 | eaddr = saddr + size - 1; | 487 | eaddr = saddr + size - 1; |
390 | last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; | 488 | |
391 | if (last_pfn >= adev->vm_manager.max_pfn) { | 489 | if (vm->pte_support_ats) |
392 | dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", | 490 | ats = saddr < AMDGPU_VA_HOLE_START; |
393 | last_pfn, adev->vm_manager.max_pfn); | ||
394 | return -EINVAL; | ||
395 | } | ||
396 | 491 | ||
397 | saddr /= AMDGPU_GPU_PAGE_SIZE; | 492 | saddr /= AMDGPU_GPU_PAGE_SIZE; |
398 | eaddr /= AMDGPU_GPU_PAGE_SIZE; | 493 | eaddr /= AMDGPU_GPU_PAGE_SIZE; |
399 | 494 | ||
495 | if (eaddr >= adev->vm_manager.max_pfn) { | ||
496 | dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", | ||
497 | eaddr, adev->vm_manager.max_pfn); | ||
498 | return -EINVAL; | ||
499 | } | ||
500 | |||
400 | return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, | 501 | return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, |
401 | adev->vm_manager.root_level); | 502 | adev->vm_manager.root_level, ats); |
402 | } | 503 | } |
403 | 504 | ||
404 | /** | 505 | /** |
@@ -465,7 +566,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, | |||
465 | 566 | ||
466 | static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) | 567 | static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) |
467 | { | 568 | { |
468 | return (adev->mc.real_vram_size == adev->mc.visible_vram_size); | 569 | return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size); |
469 | } | 570 | } |
470 | 571 | ||
471 | /** | 572 | /** |
@@ -491,14 +592,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ | |||
491 | id->oa_base != job->oa_base || | 592 | id->oa_base != job->oa_base || |
492 | id->oa_size != job->oa_size); | 593 | id->oa_size != job->oa_size); |
493 | bool vm_flush_needed = job->vm_needs_flush; | 594 | bool vm_flush_needed = job->vm_needs_flush; |
595 | bool pasid_mapping_needed = id->pasid != job->pasid || | ||
596 | !id->pasid_mapping || | ||
597 | !dma_fence_is_signaled(id->pasid_mapping); | ||
598 | struct dma_fence *fence = NULL; | ||
494 | unsigned patch_offset = 0; | 599 | unsigned patch_offset = 0; |
495 | int r; | 600 | int r; |
496 | 601 | ||
497 | if (amdgpu_vmid_had_gpu_reset(adev, id)) { | 602 | if (amdgpu_vmid_had_gpu_reset(adev, id)) { |
498 | gds_switch_needed = true; | 603 | gds_switch_needed = true; |
499 | vm_flush_needed = true; | 604 | vm_flush_needed = true; |
605 | pasid_mapping_needed = true; | ||
500 | } | 606 | } |
501 | 607 | ||
608 | gds_switch_needed &= !!ring->funcs->emit_gds_switch; | ||
609 | vm_flush_needed &= !!ring->funcs->emit_vm_flush; | ||
610 | pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && | ||
611 | ring->funcs->emit_wreg; | ||
612 | |||
502 | if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) | 613 | if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) |
503 | return 0; | 614 | return 0; |
504 | 615 | ||
@@ -508,23 +619,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ | |||
508 | if (need_pipe_sync) | 619 | if (need_pipe_sync) |
509 | amdgpu_ring_emit_pipeline_sync(ring); | 620 | amdgpu_ring_emit_pipeline_sync(ring); |
510 | 621 | ||
511 | if (ring->funcs->emit_vm_flush && vm_flush_needed) { | 622 | if (vm_flush_needed) { |
512 | struct dma_fence *fence; | ||
513 | |||
514 | trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); | 623 | trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); |
515 | amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); | 624 | amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); |
625 | } | ||
626 | |||
627 | if (pasid_mapping_needed) | ||
628 | amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); | ||
516 | 629 | ||
630 | if (vm_flush_needed || pasid_mapping_needed) { | ||
517 | r = amdgpu_fence_emit(ring, &fence); | 631 | r = amdgpu_fence_emit(ring, &fence); |
518 | if (r) | 632 | if (r) |
519 | return r; | 633 | return r; |
634 | } | ||
520 | 635 | ||
636 | if (vm_flush_needed) { | ||
521 | mutex_lock(&id_mgr->lock); | 637 | mutex_lock(&id_mgr->lock); |
522 | dma_fence_put(id->last_flush); | 638 | dma_fence_put(id->last_flush); |
523 | id->last_flush = fence; | 639 | id->last_flush = dma_fence_get(fence); |
524 | id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); | 640 | id->current_gpu_reset_count = |
641 | atomic_read(&adev->gpu_reset_counter); | ||
525 | mutex_unlock(&id_mgr->lock); | 642 | mutex_unlock(&id_mgr->lock); |
526 | } | 643 | } |
527 | 644 | ||
645 | if (pasid_mapping_needed) { | ||
646 | id->pasid = job->pasid; | ||
647 | dma_fence_put(id->pasid_mapping); | ||
648 | id->pasid_mapping = dma_fence_get(fence); | ||
649 | } | ||
650 | dma_fence_put(fence); | ||
651 | |||
528 | if (ring->funcs->emit_gds_switch && gds_switch_needed) { | 652 | if (ring->funcs->emit_gds_switch && gds_switch_needed) { |
529 | id->gds_base = job->gds_base; | 653 | id->gds_base = job->gds_base; |
530 | id->gds_size = job->gds_size; | 654 | id->gds_size = job->gds_size; |
@@ -578,6 +702,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | |||
578 | * amdgpu_vm_do_set_ptes - helper to call the right asic function | 702 | * amdgpu_vm_do_set_ptes - helper to call the right asic function |
579 | * | 703 | * |
580 | * @params: see amdgpu_pte_update_params definition | 704 | * @params: see amdgpu_pte_update_params definition |
705 | * @bo: PD/PT to update | ||
581 | * @pe: addr of the page entry | 706 | * @pe: addr of the page entry |
582 | * @addr: dst addr to write into pe | 707 | * @addr: dst addr to write into pe |
583 | * @count: number of page entries to update | 708 | * @count: number of page entries to update |
@@ -588,10 +713,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | |||
588 | * to setup the page table using the DMA. | 713 | * to setup the page table using the DMA. |
589 | */ | 714 | */ |
590 | static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, | 715 | static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, |
716 | struct amdgpu_bo *bo, | ||
591 | uint64_t pe, uint64_t addr, | 717 | uint64_t pe, uint64_t addr, |
592 | unsigned count, uint32_t incr, | 718 | unsigned count, uint32_t incr, |
593 | uint64_t flags) | 719 | uint64_t flags) |
594 | { | 720 | { |
721 | pe += amdgpu_bo_gpu_offset(bo); | ||
595 | trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); | 722 | trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); |
596 | 723 | ||
597 | if (count < 3) { | 724 | if (count < 3) { |
@@ -608,6 +735,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, | |||
608 | * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART | 735 | * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART |
609 | * | 736 | * |
610 | * @params: see amdgpu_pte_update_params definition | 737 | * @params: see amdgpu_pte_update_params definition |
738 | * @bo: PD/PT to update | ||
611 | * @pe: addr of the page entry | 739 | * @pe: addr of the page entry |
612 | * @addr: dst addr to write into pe | 740 | * @addr: dst addr to write into pe |
613 | * @count: number of page entries to update | 741 | * @count: number of page entries to update |
@@ -617,13 +745,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, | |||
617 | * Traces the parameters and calls the DMA function to copy the PTEs. | 745 | * Traces the parameters and calls the DMA function to copy the PTEs. |
618 | */ | 746 | */ |
619 | static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, | 747 | static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, |
748 | struct amdgpu_bo *bo, | ||
620 | uint64_t pe, uint64_t addr, | 749 | uint64_t pe, uint64_t addr, |
621 | unsigned count, uint32_t incr, | 750 | unsigned count, uint32_t incr, |
622 | uint64_t flags) | 751 | uint64_t flags) |
623 | { | 752 | { |
624 | uint64_t src = (params->src + (addr >> 12) * 8); | 753 | uint64_t src = (params->src + (addr >> 12) * 8); |
625 | 754 | ||
626 | 755 | pe += amdgpu_bo_gpu_offset(bo); | |
627 | trace_amdgpu_vm_copy_ptes(pe, src, count); | 756 | trace_amdgpu_vm_copy_ptes(pe, src, count); |
628 | 757 | ||
629 | amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); | 758 | amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); |
@@ -657,6 +786,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) | |||
657 | * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU | 786 | * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU |
658 | * | 787 | * |
659 | * @params: see amdgpu_pte_update_params definition | 788 | * @params: see amdgpu_pte_update_params definition |
789 | * @bo: PD/PT to update | ||
660 | * @pe: kmap addr of the page entry | 790 | * @pe: kmap addr of the page entry |
661 | * @addr: dst addr to write into pe | 791 | * @addr: dst addr to write into pe |
662 | * @count: number of page entries to update | 792 | * @count: number of page entries to update |
@@ -666,6 +796,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) | |||
666 | * Write count number of PT/PD entries directly. | 796 | * Write count number of PT/PD entries directly. |
667 | */ | 797 | */ |
668 | static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, | 798 | static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, |
799 | struct amdgpu_bo *bo, | ||
669 | uint64_t pe, uint64_t addr, | 800 | uint64_t pe, uint64_t addr, |
670 | unsigned count, uint32_t incr, | 801 | unsigned count, uint32_t incr, |
671 | uint64_t flags) | 802 | uint64_t flags) |
@@ -673,14 +804,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, | |||
673 | unsigned int i; | 804 | unsigned int i; |
674 | uint64_t value; | 805 | uint64_t value; |
675 | 806 | ||
807 | pe += (unsigned long)amdgpu_bo_kptr(bo); | ||
808 | |||
676 | trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); | 809 | trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); |
677 | 810 | ||
678 | for (i = 0; i < count; i++) { | 811 | for (i = 0; i < count; i++) { |
679 | value = params->pages_addr ? | 812 | value = params->pages_addr ? |
680 | amdgpu_vm_map_gart(params->pages_addr, addr) : | 813 | amdgpu_vm_map_gart(params->pages_addr, addr) : |
681 | addr; | 814 | addr; |
682 | amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, | 815 | amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, |
683 | i, value, flags); | 816 | i, value, flags); |
684 | addr += incr; | 817 | addr += incr; |
685 | } | 818 | } |
686 | } | 819 | } |
@@ -714,8 +847,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, | |||
714 | struct amdgpu_vm_pt *parent, | 847 | struct amdgpu_vm_pt *parent, |
715 | struct amdgpu_vm_pt *entry) | 848 | struct amdgpu_vm_pt *entry) |
716 | { | 849 | { |
717 | struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; | 850 | struct amdgpu_bo *bo = parent->base.bo, *pbo; |
718 | uint64_t pd_addr, shadow_addr = 0; | ||
719 | uint64_t pde, pt, flags; | 851 | uint64_t pde, pt, flags; |
720 | unsigned level; | 852 | unsigned level; |
721 | 853 | ||
@@ -723,29 +855,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, | |||
723 | if (entry->huge) | 855 | if (entry->huge) |
724 | return; | 856 | return; |
725 | 857 | ||
726 | if (vm->use_cpu_for_update) { | 858 | for (level = 0, pbo = bo->parent; pbo; ++level) |
727 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); | ||
728 | } else { | ||
729 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); | ||
730 | shadow = parent->base.bo->shadow; | ||
731 | if (shadow) | ||
732 | shadow_addr = amdgpu_bo_gpu_offset(shadow); | ||
733 | } | ||
734 | |||
735 | for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) | ||
736 | pbo = pbo->parent; | 859 | pbo = pbo->parent; |
737 | 860 | ||
738 | level += params->adev->vm_manager.root_level; | 861 | level += params->adev->vm_manager.root_level; |
739 | pt = amdgpu_bo_gpu_offset(bo); | 862 | pt = amdgpu_bo_gpu_offset(entry->base.bo); |
740 | flags = AMDGPU_PTE_VALID; | 863 | flags = AMDGPU_PTE_VALID; |
741 | amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); | 864 | amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); |
742 | if (shadow) { | 865 | pde = (entry - parent->entries) * 8; |
743 | pde = shadow_addr + (entry - parent->entries) * 8; | 866 | if (bo->shadow) |
744 | params->func(params, pde, pt, 1, 0, flags); | 867 | params->func(params, bo->shadow, pde, pt, 1, 0, flags); |
745 | } | 868 | params->func(params, bo, pde, pt, 1, 0, flags); |
746 | |||
747 | pde = pd_addr + (entry - parent->entries) * 8; | ||
748 | params->func(params, pde, pt, 1, 0, flags); | ||
749 | } | 869 | } |
750 | 870 | ||
751 | /* | 871 | /* |
@@ -856,7 +976,7 @@ restart: | |||
856 | if (vm->use_cpu_for_update) { | 976 | if (vm->use_cpu_for_update) { |
857 | /* Flush HDP */ | 977 | /* Flush HDP */ |
858 | mb(); | 978 | mb(); |
859 | amdgpu_gart_flush_gpu_tlb(adev, 0); | 979 | amdgpu_asic_flush_hdp(adev, NULL); |
860 | } else if (params.ib->length_dw == 0) { | 980 | } else if (params.ib->length_dw == 0) { |
861 | amdgpu_job_free(job); | 981 | amdgpu_job_free(job); |
862 | } else { | 982 | } else { |
@@ -870,11 +990,6 @@ restart: | |||
870 | amdgpu_ring_pad_ib(ring, params.ib); | 990 | amdgpu_ring_pad_ib(ring, params.ib); |
871 | amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, | 991 | amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, |
872 | AMDGPU_FENCE_OWNER_VM, false); | 992 | AMDGPU_FENCE_OWNER_VM, false); |
873 | if (root->shadow) | ||
874 | amdgpu_sync_resv(adev, &job->sync, | ||
875 | root->shadow->tbo.resv, | ||
876 | AMDGPU_FENCE_OWNER_VM, false); | ||
877 | |||
878 | WARN_ON(params.ib->length_dw > ndw); | 993 | WARN_ON(params.ib->length_dw > ndw); |
879 | r = amdgpu_job_submit(job, ring, &vm->entity, | 994 | r = amdgpu_job_submit(job, ring, &vm->entity, |
880 | AMDGPU_FENCE_OWNER_VM, &fence); | 995 | AMDGPU_FENCE_OWNER_VM, &fence); |
@@ -946,7 +1061,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | |||
946 | unsigned nptes, uint64_t dst, | 1061 | unsigned nptes, uint64_t dst, |
947 | uint64_t flags) | 1062 | uint64_t flags) |
948 | { | 1063 | { |
949 | uint64_t pd_addr, pde; | 1064 | uint64_t pde; |
950 | 1065 | ||
951 | /* In the case of a mixed PT the PDE must point to it*/ | 1066 | /* In the case of a mixed PT the PDE must point to it*/ |
952 | if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && | 1067 | if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && |
@@ -967,21 +1082,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | |||
967 | } | 1082 | } |
968 | 1083 | ||
969 | entry->huge = true; | 1084 | entry->huge = true; |
970 | amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, | 1085 | amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); |
971 | &dst, &flags); | ||
972 | 1086 | ||
973 | if (p->func == amdgpu_vm_cpu_set_ptes) { | 1087 | pde = (entry - parent->entries) * 8; |
974 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); | 1088 | if (parent->base.bo->shadow) |
975 | } else { | 1089 | p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); |
976 | if (parent->base.bo->shadow) { | 1090 | p->func(p, parent->base.bo, pde, dst, 1, 0, flags); |
977 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); | ||
978 | pde = pd_addr + (entry - parent->entries) * 8; | ||
979 | p->func(p, pde, dst, 1, 0, flags); | ||
980 | } | ||
981 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); | ||
982 | } | ||
983 | pde = pd_addr + (entry - parent->entries) * 8; | ||
984 | p->func(p, pde, dst, 1, 0, flags); | ||
985 | } | 1091 | } |
986 | 1092 | ||
987 | /** | 1093 | /** |
@@ -1007,7 +1113,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1007 | uint64_t addr, pe_start; | 1113 | uint64_t addr, pe_start; |
1008 | struct amdgpu_bo *pt; | 1114 | struct amdgpu_bo *pt; |
1009 | unsigned nptes; | 1115 | unsigned nptes; |
1010 | bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); | ||
1011 | 1116 | ||
1012 | /* walk over the address space and update the page tables */ | 1117 | /* walk over the address space and update the page tables */ |
1013 | for (addr = start; addr < end; addr += nptes, | 1118 | for (addr = start; addr < end; addr += nptes, |
@@ -1030,20 +1135,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1030 | continue; | 1135 | continue; |
1031 | 1136 | ||
1032 | pt = entry->base.bo; | 1137 | pt = entry->base.bo; |
1033 | if (use_cpu_update) { | 1138 | pe_start = (addr & mask) * 8; |
1034 | pe_start = (unsigned long)amdgpu_bo_kptr(pt); | 1139 | if (pt->shadow) |
1035 | } else { | 1140 | params->func(params, pt->shadow, pe_start, dst, nptes, |
1036 | if (pt->shadow) { | 1141 | AMDGPU_GPU_PAGE_SIZE, flags); |
1037 | pe_start = amdgpu_bo_gpu_offset(pt->shadow); | 1142 | params->func(params, pt, pe_start, dst, nptes, |
1038 | pe_start += (addr & mask) * 8; | ||
1039 | params->func(params, pe_start, dst, nptes, | ||
1040 | AMDGPU_GPU_PAGE_SIZE, flags); | ||
1041 | } | ||
1042 | pe_start = amdgpu_bo_gpu_offset(pt); | ||
1043 | } | ||
1044 | |||
1045 | pe_start += (addr & mask) * 8; | ||
1046 | params->func(params, pe_start, dst, nptes, | ||
1047 | AMDGPU_GPU_PAGE_SIZE, flags); | 1143 | AMDGPU_GPU_PAGE_SIZE, flags); |
1048 | } | 1144 | } |
1049 | 1145 | ||
@@ -1204,11 +1300,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1204 | 1300 | ||
1205 | } else { | 1301 | } else { |
1206 | /* set page commands needed */ | 1302 | /* set page commands needed */ |
1207 | ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; | 1303 | ndw += ncmds * 10; |
1208 | 1304 | ||
1209 | /* extra commands for begin/end fragments */ | 1305 | /* extra commands for begin/end fragments */ |
1210 | ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw | 1306 | ndw += 2 * 10 * adev->vm_manager.fragment_size; |
1211 | * adev->vm_manager.fragment_size; | ||
1212 | 1307 | ||
1213 | params.func = amdgpu_vm_do_set_ptes; | 1308 | params.func = amdgpu_vm_do_set_ptes; |
1214 | } | 1309 | } |
@@ -1457,7 +1552,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1457 | if (vm->use_cpu_for_update) { | 1552 | if (vm->use_cpu_for_update) { |
1458 | /* Flush HDP */ | 1553 | /* Flush HDP */ |
1459 | mb(); | 1554 | mb(); |
1460 | amdgpu_gart_flush_gpu_tlb(adev, 0); | 1555 | amdgpu_asic_flush_hdp(adev, NULL); |
1461 | } | 1556 | } |
1462 | 1557 | ||
1463 | spin_lock(&vm->status_lock); | 1558 | spin_lock(&vm->status_lock); |
@@ -1485,7 +1580,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) | |||
1485 | 1580 | ||
1486 | spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); | 1581 | spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); |
1487 | enable = !!atomic_read(&adev->vm_manager.num_prt_users); | 1582 | enable = !!atomic_read(&adev->vm_manager.num_prt_users); |
1488 | adev->gart.gart_funcs->set_prt(adev, enable); | 1583 | adev->gmc.gmc_funcs->set_prt(adev, enable); |
1489 | spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); | 1584 | spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); |
1490 | } | 1585 | } |
1491 | 1586 | ||
@@ -1494,7 +1589,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) | |||
1494 | */ | 1589 | */ |
1495 | static void amdgpu_vm_prt_get(struct amdgpu_device *adev) | 1590 | static void amdgpu_vm_prt_get(struct amdgpu_device *adev) |
1496 | { | 1591 | { |
1497 | if (!adev->gart.gart_funcs->set_prt) | 1592 | if (!adev->gmc.gmc_funcs->set_prt) |
1498 | return; | 1593 | return; |
1499 | 1594 | ||
1500 | if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) | 1595 | if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) |
@@ -1529,7 +1624,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, | |||
1529 | { | 1624 | { |
1530 | struct amdgpu_prt_cb *cb; | 1625 | struct amdgpu_prt_cb *cb; |
1531 | 1626 | ||
1532 | if (!adev->gart.gart_funcs->set_prt) | 1627 | if (!adev->gmc.gmc_funcs->set_prt) |
1533 | return; | 1628 | return; |
1534 | 1629 | ||
1535 | cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); | 1630 | cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); |
@@ -1623,16 +1718,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
1623 | struct dma_fence **fence) | 1718 | struct dma_fence **fence) |
1624 | { | 1719 | { |
1625 | struct amdgpu_bo_va_mapping *mapping; | 1720 | struct amdgpu_bo_va_mapping *mapping; |
1721 | uint64_t init_pte_value = 0; | ||
1626 | struct dma_fence *f = NULL; | 1722 | struct dma_fence *f = NULL; |
1627 | int r; | 1723 | int r; |
1628 | uint64_t init_pte_value = 0; | ||
1629 | 1724 | ||
1630 | while (!list_empty(&vm->freed)) { | 1725 | while (!list_empty(&vm->freed)) { |
1631 | mapping = list_first_entry(&vm->freed, | 1726 | mapping = list_first_entry(&vm->freed, |
1632 | struct amdgpu_bo_va_mapping, list); | 1727 | struct amdgpu_bo_va_mapping, list); |
1633 | list_del(&mapping->list); | 1728 | list_del(&mapping->list); |
1634 | 1729 | ||
1635 | if (vm->pte_support_ats) | 1730 | if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) |
1636 | init_pte_value = AMDGPU_PTE_DEFAULT_ATC; | 1731 | init_pte_value = AMDGPU_PTE_DEFAULT_ATC; |
1637 | 1732 | ||
1638 | r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, | 1733 | r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, |
@@ -2262,11 +2357,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2262 | { | 2357 | { |
2263 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, | 2358 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, |
2264 | AMDGPU_VM_PTE_COUNT(adev) * 8); | 2359 | AMDGPU_VM_PTE_COUNT(adev) * 8); |
2265 | uint64_t init_pde_value = 0, flags; | ||
2266 | unsigned ring_instance; | 2360 | unsigned ring_instance; |
2267 | struct amdgpu_ring *ring; | 2361 | struct amdgpu_ring *ring; |
2268 | struct drm_sched_rq *rq; | 2362 | struct drm_sched_rq *rq; |
2269 | unsigned long size; | 2363 | unsigned long size; |
2364 | uint64_t flags; | ||
2270 | int r, i; | 2365 | int r, i; |
2271 | 2366 | ||
2272 | vm->va = RB_ROOT_CACHED; | 2367 | vm->va = RB_ROOT_CACHED; |
@@ -2295,33 +2390,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2295 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | 2390 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & |
2296 | AMDGPU_VM_USE_CPU_FOR_COMPUTE); | 2391 | AMDGPU_VM_USE_CPU_FOR_COMPUTE); |
2297 | 2392 | ||
2298 | if (adev->asic_type == CHIP_RAVEN) { | 2393 | if (adev->asic_type == CHIP_RAVEN) |
2299 | vm->pte_support_ats = true; | 2394 | vm->pte_support_ats = true; |
2300 | init_pde_value = AMDGPU_PTE_DEFAULT_ATC | 2395 | } else { |
2301 | | AMDGPU_PDE_PTE; | ||
2302 | |||
2303 | } | ||
2304 | } else | ||
2305 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | 2396 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & |
2306 | AMDGPU_VM_USE_CPU_FOR_GFX); | 2397 | AMDGPU_VM_USE_CPU_FOR_GFX); |
2398 | } | ||
2307 | DRM_DEBUG_DRIVER("VM update mode is %s\n", | 2399 | DRM_DEBUG_DRIVER("VM update mode is %s\n", |
2308 | vm->use_cpu_for_update ? "CPU" : "SDMA"); | 2400 | vm->use_cpu_for_update ? "CPU" : "SDMA"); |
2309 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), | 2401 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), |
2310 | "CPU update of VM recommended only for large BAR system\n"); | 2402 | "CPU update of VM recommended only for large BAR system\n"); |
2311 | vm->last_update = NULL; | 2403 | vm->last_update = NULL; |
2312 | 2404 | ||
2313 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | 2405 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
2314 | AMDGPU_GEM_CREATE_VRAM_CLEARED; | ||
2315 | if (vm->use_cpu_for_update) | 2406 | if (vm->use_cpu_for_update) |
2316 | flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | 2407 | flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
2317 | else | 2408 | else |
2318 | flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 2409 | flags |= AMDGPU_GEM_CREATE_SHADOW; |
2319 | AMDGPU_GEM_CREATE_SHADOW); | ||
2320 | 2410 | ||
2321 | size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); | 2411 | size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); |
2322 | r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, | 2412 | r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, |
2323 | flags, NULL, NULL, init_pde_value, | 2413 | ttm_bo_type_kernel, NULL, &vm->root.base.bo); |
2324 | &vm->root.base.bo); | ||
2325 | if (r) | 2414 | if (r) |
2326 | goto error_free_sched_entity; | 2415 | goto error_free_sched_entity; |
2327 | 2416 | ||
@@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2329 | if (r) | 2418 | if (r) |
2330 | goto error_free_root; | 2419 | goto error_free_root; |
2331 | 2420 | ||
2421 | r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, | ||
2422 | adev->vm_manager.root_level, | ||
2423 | vm->pte_support_ats); | ||
2424 | if (r) | ||
2425 | goto error_unreserve; | ||
2426 | |||
2332 | vm->root.base.vm = vm; | 2427 | vm->root.base.vm = vm; |
2333 | list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); | 2428 | list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); |
2334 | list_add_tail(&vm->root.base.vm_status, &vm->evicted); | 2429 | list_add_tail(&vm->root.base.vm_status, &vm->evicted); |
@@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2352 | 2447 | ||
2353 | return 0; | 2448 | return 0; |
2354 | 2449 | ||
2450 | error_unreserve: | ||
2451 | amdgpu_bo_unreserve(vm->root.base.bo); | ||
2452 | |||
2355 | error_free_root: | 2453 | error_free_root: |
2356 | amdgpu_bo_unref(&vm->root.base.bo->shadow); | 2454 | amdgpu_bo_unref(&vm->root.base.bo->shadow); |
2357 | amdgpu_bo_unref(&vm->root.base.bo); | 2455 | amdgpu_bo_unref(&vm->root.base.bo); |
@@ -2364,6 +2462,73 @@ error_free_sched_entity: | |||
2364 | } | 2462 | } |
2365 | 2463 | ||
2366 | /** | 2464 | /** |
2465 | * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM | ||
2466 | * | ||
2467 | * This only works on GFX VMs that don't have any BOs added and no | ||
2468 | * page tables allocated yet. | ||
2469 | * | ||
2470 | * Changes the following VM parameters: | ||
2471 | * - use_cpu_for_update | ||
2472 | * - pte_supports_ats | ||
2473 | * - pasid (old PASID is released, because compute manages its own PASIDs) | ||
2474 | * | ||
2475 | * Reinitializes the page directory to reflect the changed ATS | ||
2476 | * setting. May leave behind an unused shadow BO for the page | ||
2477 | * directory when switching from SDMA updates to CPU updates. | ||
2478 | * | ||
2479 | * Returns 0 for success, -errno for errors. | ||
2480 | */ | ||
2481 | int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) | ||
2482 | { | ||
2483 | bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); | ||
2484 | int r; | ||
2485 | |||
2486 | r = amdgpu_bo_reserve(vm->root.base.bo, true); | ||
2487 | if (r) | ||
2488 | return r; | ||
2489 | |||
2490 | /* Sanity checks */ | ||
2491 | if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { | ||
2492 | r = -EINVAL; | ||
2493 | goto error; | ||
2494 | } | ||
2495 | |||
2496 | /* Check if PD needs to be reinitialized and do it before | ||
2497 | * changing any other state, in case it fails. | ||
2498 | */ | ||
2499 | if (pte_support_ats != vm->pte_support_ats) { | ||
2500 | r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, | ||
2501 | adev->vm_manager.root_level, | ||
2502 | pte_support_ats); | ||
2503 | if (r) | ||
2504 | goto error; | ||
2505 | } | ||
2506 | |||
2507 | /* Update VM state */ | ||
2508 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | ||
2509 | AMDGPU_VM_USE_CPU_FOR_COMPUTE); | ||
2510 | vm->pte_support_ats = pte_support_ats; | ||
2511 | DRM_DEBUG_DRIVER("VM update mode is %s\n", | ||
2512 | vm->use_cpu_for_update ? "CPU" : "SDMA"); | ||
2513 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), | ||
2514 | "CPU update of VM recommended only for large BAR system\n"); | ||
2515 | |||
2516 | if (vm->pasid) { | ||
2517 | unsigned long flags; | ||
2518 | |||
2519 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); | ||
2520 | idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); | ||
2521 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); | ||
2522 | |||
2523 | vm->pasid = 0; | ||
2524 | } | ||
2525 | |||
2526 | error: | ||
2527 | amdgpu_bo_unreserve(vm->root.base.bo); | ||
2528 | return r; | ||
2529 | } | ||
2530 | |||
2531 | /** | ||
2367 | * amdgpu_vm_free_levels - free PD/PT levels | 2532 | * amdgpu_vm_free_levels - free PD/PT levels |
2368 | * | 2533 | * |
2369 | * @adev: amdgpu device structure | 2534 | * @adev: amdgpu device structure |
@@ -2405,11 +2570,13 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev, | |||
2405 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | 2570 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) |
2406 | { | 2571 | { |
2407 | struct amdgpu_bo_va_mapping *mapping, *tmp; | 2572 | struct amdgpu_bo_va_mapping *mapping, *tmp; |
2408 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; | 2573 | bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; |
2409 | struct amdgpu_bo *root; | 2574 | struct amdgpu_bo *root; |
2410 | u64 fault; | 2575 | u64 fault; |
2411 | int i, r; | 2576 | int i, r; |
2412 | 2577 | ||
2578 | amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); | ||
2579 | |||
2413 | /* Clear pending page faults from IH when the VM is destroyed */ | 2580 | /* Clear pending page faults from IH when the VM is destroyed */ |
2414 | while (kfifo_get(&vm->faults, &fault)) | 2581 | while (kfifo_get(&vm->faults, &fault)) |
2415 | amdgpu_ih_clear_fault(adev, fault); | 2582 | amdgpu_ih_clear_fault(adev, fault); |