aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c399
1 files changed, 283 insertions, 116 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5afbc5e714d0..da55a78d7380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -32,6 +32,7 @@
32#include <drm/amdgpu_drm.h> 32#include <drm/amdgpu_drm.h>
33#include "amdgpu.h" 33#include "amdgpu.h"
34#include "amdgpu_trace.h" 34#include "amdgpu_trace.h"
35#include "amdgpu_amdkfd.h"
35 36
36/* 37/*
37 * GPUVM 38 * GPUVM
@@ -75,7 +76,8 @@ struct amdgpu_pte_update_params {
75 /* indirect buffer to fill with commands */ 76 /* indirect buffer to fill with commands */
76 struct amdgpu_ib *ib; 77 struct amdgpu_ib *ib;
77 /* Function which actually does the update */ 78 /* Function which actually does the update */
78 void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, 79 void (*func)(struct amdgpu_pte_update_params *params,
80 struct amdgpu_bo *bo, uint64_t pe,
79 uint64_t addr, unsigned count, uint32_t incr, 81 uint64_t addr, unsigned count, uint32_t incr,
80 uint64_t flags); 82 uint64_t flags);
81 /* The next two are used during VM update by CPU 83 /* The next two are used during VM update by CPU
@@ -257,6 +259,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
257} 259}
258 260
259/** 261/**
262 * amdgpu_vm_clear_bo - initially clear the PDs/PTs
263 *
264 * @adev: amdgpu_device pointer
265 * @bo: BO to clear
266 * @level: level this BO is at
267 *
268 * Root PD needs to be reserved when calling this.
269 */
270static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
271 struct amdgpu_vm *vm, struct amdgpu_bo *bo,
272 unsigned level, bool pte_support_ats)
273{
274 struct ttm_operation_ctx ctx = { true, false };
275 struct dma_fence *fence = NULL;
276 unsigned entries, ats_entries;
277 struct amdgpu_ring *ring;
278 struct amdgpu_job *job;
279 uint64_t addr;
280 int r;
281
282 addr = amdgpu_bo_gpu_offset(bo);
283 entries = amdgpu_bo_size(bo) / 8;
284
285 if (pte_support_ats) {
286 if (level == adev->vm_manager.root_level) {
287 ats_entries = amdgpu_vm_level_shift(adev, level);
288 ats_entries += AMDGPU_GPU_PAGE_SHIFT;
289 ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
290 ats_entries = min(ats_entries, entries);
291 entries -= ats_entries;
292 } else {
293 ats_entries = entries;
294 entries = 0;
295 }
296 } else {
297 ats_entries = 0;
298 }
299
300 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
301
302 r = reservation_object_reserve_shared(bo->tbo.resv);
303 if (r)
304 return r;
305
306 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
307 if (r)
308 goto error;
309
310 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
311 if (r)
312 goto error;
313
314 if (ats_entries) {
315 uint64_t ats_value;
316
317 ats_value = AMDGPU_PTE_DEFAULT_ATC;
318 if (level != AMDGPU_VM_PTB)
319 ats_value |= AMDGPU_PDE_PTE;
320
321 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
322 ats_entries, 0, ats_value);
323 addr += ats_entries * 8;
324 }
325
326 if (entries)
327 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
328 entries, 0, 0);
329
330 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
331
332 WARN_ON(job->ibs[0].length_dw > 64);
333 r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
334 AMDGPU_FENCE_OWNER_UNDEFINED, false);
335 if (r)
336 goto error_free;
337
338 r = amdgpu_job_submit(job, ring, &vm->entity,
339 AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
340 if (r)
341 goto error_free;
342
343 amdgpu_bo_fence(bo, fence, true);
344 dma_fence_put(fence);
345
346 if (bo->shadow)
347 return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
348 level, pte_support_ats);
349
350 return 0;
351
352error_free:
353 amdgpu_job_free(job);
354
355error:
356 return r;
357}
358
359/**
260 * amdgpu_vm_alloc_levels - allocate the PD/PT levels 360 * amdgpu_vm_alloc_levels - allocate the PD/PT levels
261 * 361 *
262 * @adev: amdgpu_device pointer 362 * @adev: amdgpu_device pointer
@@ -270,13 +370,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
270 struct amdgpu_vm *vm, 370 struct amdgpu_vm *vm,
271 struct amdgpu_vm_pt *parent, 371 struct amdgpu_vm_pt *parent,
272 uint64_t saddr, uint64_t eaddr, 372 uint64_t saddr, uint64_t eaddr,
273 unsigned level) 373 unsigned level, bool ats)
274{ 374{
275 unsigned shift = amdgpu_vm_level_shift(adev, level); 375 unsigned shift = amdgpu_vm_level_shift(adev, level);
276 unsigned pt_idx, from, to; 376 unsigned pt_idx, from, to;
277 int r;
278 u64 flags; 377 u64 flags;
279 uint64_t init_value = 0; 378 int r;
280 379
281 if (!parent->entries) { 380 if (!parent->entries) {
282 unsigned num_entries = amdgpu_vm_num_entries(adev, level); 381 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
@@ -299,21 +398,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
299 saddr = saddr & ((1 << shift) - 1); 398 saddr = saddr & ((1 << shift) - 1);
300 eaddr = eaddr & ((1 << shift) - 1); 399 eaddr = eaddr & ((1 << shift) - 1);
301 400
302 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 401 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
303 AMDGPU_GEM_CREATE_VRAM_CLEARED;
304 if (vm->use_cpu_for_update) 402 if (vm->use_cpu_for_update)
305 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 403 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
306 else 404 else
307 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 405 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
308 AMDGPU_GEM_CREATE_SHADOW); 406 AMDGPU_GEM_CREATE_SHADOW);
309 407
310 if (vm->pte_support_ats) {
311 init_value = AMDGPU_PTE_DEFAULT_ATC;
312 if (level != AMDGPU_VM_PTB)
313 init_value |= AMDGPU_PDE_PTE;
314
315 }
316
317 /* walk over the address space and allocate the page tables */ 408 /* walk over the address space and allocate the page tables */
318 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 409 for (pt_idx = from; pt_idx <= to; ++pt_idx) {
319 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 410 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
@@ -323,16 +414,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
323 if (!entry->base.bo) { 414 if (!entry->base.bo) {
324 r = amdgpu_bo_create(adev, 415 r = amdgpu_bo_create(adev,
325 amdgpu_vm_bo_size(adev, level), 416 amdgpu_vm_bo_size(adev, level),
326 AMDGPU_GPU_PAGE_SIZE, true, 417 AMDGPU_GPU_PAGE_SIZE,
327 AMDGPU_GEM_DOMAIN_VRAM, 418 AMDGPU_GEM_DOMAIN_VRAM, flags,
328 flags, 419 ttm_bo_type_kernel, resv, &pt);
329 NULL, resv, init_value, &pt);
330 if (r) 420 if (r)
331 return r; 421 return r;
332 422
423 r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
424 if (r) {
425 amdgpu_bo_unref(&pt->shadow);
426 amdgpu_bo_unref(&pt);
427 return r;
428 }
429
333 if (vm->use_cpu_for_update) { 430 if (vm->use_cpu_for_update) {
334 r = amdgpu_bo_kmap(pt, NULL); 431 r = amdgpu_bo_kmap(pt, NULL);
335 if (r) { 432 if (r) {
433 amdgpu_bo_unref(&pt->shadow);
336 amdgpu_bo_unref(&pt); 434 amdgpu_bo_unref(&pt);
337 return r; 435 return r;
338 } 436 }
@@ -356,7 +454,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
356 uint64_t sub_eaddr = (pt_idx == to) ? eaddr : 454 uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
357 ((1 << shift) - 1); 455 ((1 << shift) - 1);
358 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, 456 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
359 sub_eaddr, level); 457 sub_eaddr, level, ats);
360 if (r) 458 if (r)
361 return r; 459 return r;
362 } 460 }
@@ -379,26 +477,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
379 struct amdgpu_vm *vm, 477 struct amdgpu_vm *vm,
380 uint64_t saddr, uint64_t size) 478 uint64_t saddr, uint64_t size)
381{ 479{
382 uint64_t last_pfn;
383 uint64_t eaddr; 480 uint64_t eaddr;
481 bool ats = false;
384 482
385 /* validate the parameters */ 483 /* validate the parameters */
386 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) 484 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
387 return -EINVAL; 485 return -EINVAL;
388 486
389 eaddr = saddr + size - 1; 487 eaddr = saddr + size - 1;
390 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 488
391 if (last_pfn >= adev->vm_manager.max_pfn) { 489 if (vm->pte_support_ats)
392 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", 490 ats = saddr < AMDGPU_VA_HOLE_START;
393 last_pfn, adev->vm_manager.max_pfn);
394 return -EINVAL;
395 }
396 491
397 saddr /= AMDGPU_GPU_PAGE_SIZE; 492 saddr /= AMDGPU_GPU_PAGE_SIZE;
398 eaddr /= AMDGPU_GPU_PAGE_SIZE; 493 eaddr /= AMDGPU_GPU_PAGE_SIZE;
399 494
495 if (eaddr >= adev->vm_manager.max_pfn) {
496 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
497 eaddr, adev->vm_manager.max_pfn);
498 return -EINVAL;
499 }
500
400 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 501 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
401 adev->vm_manager.root_level); 502 adev->vm_manager.root_level, ats);
402} 503}
403 504
404/** 505/**
@@ -465,7 +566,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
465 566
466static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) 567static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
467{ 568{
468 return (adev->mc.real_vram_size == adev->mc.visible_vram_size); 569 return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
469} 570}
470 571
471/** 572/**
@@ -491,14 +592,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
491 id->oa_base != job->oa_base || 592 id->oa_base != job->oa_base ||
492 id->oa_size != job->oa_size); 593 id->oa_size != job->oa_size);
493 bool vm_flush_needed = job->vm_needs_flush; 594 bool vm_flush_needed = job->vm_needs_flush;
595 bool pasid_mapping_needed = id->pasid != job->pasid ||
596 !id->pasid_mapping ||
597 !dma_fence_is_signaled(id->pasid_mapping);
598 struct dma_fence *fence = NULL;
494 unsigned patch_offset = 0; 599 unsigned patch_offset = 0;
495 int r; 600 int r;
496 601
497 if (amdgpu_vmid_had_gpu_reset(adev, id)) { 602 if (amdgpu_vmid_had_gpu_reset(adev, id)) {
498 gds_switch_needed = true; 603 gds_switch_needed = true;
499 vm_flush_needed = true; 604 vm_flush_needed = true;
605 pasid_mapping_needed = true;
500 } 606 }
501 607
608 gds_switch_needed &= !!ring->funcs->emit_gds_switch;
609 vm_flush_needed &= !!ring->funcs->emit_vm_flush;
610 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
611 ring->funcs->emit_wreg;
612
502 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 613 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
503 return 0; 614 return 0;
504 615
@@ -508,23 +619,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
508 if (need_pipe_sync) 619 if (need_pipe_sync)
509 amdgpu_ring_emit_pipeline_sync(ring); 620 amdgpu_ring_emit_pipeline_sync(ring);
510 621
511 if (ring->funcs->emit_vm_flush && vm_flush_needed) { 622 if (vm_flush_needed) {
512 struct dma_fence *fence;
513
514 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); 623 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
515 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); 624 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
625 }
626
627 if (pasid_mapping_needed)
628 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
516 629
630 if (vm_flush_needed || pasid_mapping_needed) {
517 r = amdgpu_fence_emit(ring, &fence); 631 r = amdgpu_fence_emit(ring, &fence);
518 if (r) 632 if (r)
519 return r; 633 return r;
634 }
520 635
636 if (vm_flush_needed) {
521 mutex_lock(&id_mgr->lock); 637 mutex_lock(&id_mgr->lock);
522 dma_fence_put(id->last_flush); 638 dma_fence_put(id->last_flush);
523 id->last_flush = fence; 639 id->last_flush = dma_fence_get(fence);
524 id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); 640 id->current_gpu_reset_count =
641 atomic_read(&adev->gpu_reset_counter);
525 mutex_unlock(&id_mgr->lock); 642 mutex_unlock(&id_mgr->lock);
526 } 643 }
527 644
645 if (pasid_mapping_needed) {
646 id->pasid = job->pasid;
647 dma_fence_put(id->pasid_mapping);
648 id->pasid_mapping = dma_fence_get(fence);
649 }
650 dma_fence_put(fence);
651
528 if (ring->funcs->emit_gds_switch && gds_switch_needed) { 652 if (ring->funcs->emit_gds_switch && gds_switch_needed) {
529 id->gds_base = job->gds_base; 653 id->gds_base = job->gds_base;
530 id->gds_size = job->gds_size; 654 id->gds_size = job->gds_size;
@@ -578,6 +702,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
578 * amdgpu_vm_do_set_ptes - helper to call the right asic function 702 * amdgpu_vm_do_set_ptes - helper to call the right asic function
579 * 703 *
580 * @params: see amdgpu_pte_update_params definition 704 * @params: see amdgpu_pte_update_params definition
705 * @bo: PD/PT to update
581 * @pe: addr of the page entry 706 * @pe: addr of the page entry
582 * @addr: dst addr to write into pe 707 * @addr: dst addr to write into pe
583 * @count: number of page entries to update 708 * @count: number of page entries to update
@@ -588,10 +713,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
588 * to setup the page table using the DMA. 713 * to setup the page table using the DMA.
589 */ 714 */
590static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, 715static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
716 struct amdgpu_bo *bo,
591 uint64_t pe, uint64_t addr, 717 uint64_t pe, uint64_t addr,
592 unsigned count, uint32_t incr, 718 unsigned count, uint32_t incr,
593 uint64_t flags) 719 uint64_t flags)
594{ 720{
721 pe += amdgpu_bo_gpu_offset(bo);
595 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 722 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
596 723
597 if (count < 3) { 724 if (count < 3) {
@@ -608,6 +735,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
608 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART 735 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
609 * 736 *
610 * @params: see amdgpu_pte_update_params definition 737 * @params: see amdgpu_pte_update_params definition
738 * @bo: PD/PT to update
611 * @pe: addr of the page entry 739 * @pe: addr of the page entry
612 * @addr: dst addr to write into pe 740 * @addr: dst addr to write into pe
613 * @count: number of page entries to update 741 * @count: number of page entries to update
@@ -617,13 +745,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
617 * Traces the parameters and calls the DMA function to copy the PTEs. 745 * Traces the parameters and calls the DMA function to copy the PTEs.
618 */ 746 */
619static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, 747static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
748 struct amdgpu_bo *bo,
620 uint64_t pe, uint64_t addr, 749 uint64_t pe, uint64_t addr,
621 unsigned count, uint32_t incr, 750 unsigned count, uint32_t incr,
622 uint64_t flags) 751 uint64_t flags)
623{ 752{
624 uint64_t src = (params->src + (addr >> 12) * 8); 753 uint64_t src = (params->src + (addr >> 12) * 8);
625 754
626 755 pe += amdgpu_bo_gpu_offset(bo);
627 trace_amdgpu_vm_copy_ptes(pe, src, count); 756 trace_amdgpu_vm_copy_ptes(pe, src, count);
628 757
629 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); 758 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
@@ -657,6 +786,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
657 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU 786 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
658 * 787 *
659 * @params: see amdgpu_pte_update_params definition 788 * @params: see amdgpu_pte_update_params definition
789 * @bo: PD/PT to update
660 * @pe: kmap addr of the page entry 790 * @pe: kmap addr of the page entry
661 * @addr: dst addr to write into pe 791 * @addr: dst addr to write into pe
662 * @count: number of page entries to update 792 * @count: number of page entries to update
@@ -666,6 +796,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
666 * Write count number of PT/PD entries directly. 796 * Write count number of PT/PD entries directly.
667 */ 797 */
668static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, 798static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
799 struct amdgpu_bo *bo,
669 uint64_t pe, uint64_t addr, 800 uint64_t pe, uint64_t addr,
670 unsigned count, uint32_t incr, 801 unsigned count, uint32_t incr,
671 uint64_t flags) 802 uint64_t flags)
@@ -673,14 +804,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
673 unsigned int i; 804 unsigned int i;
674 uint64_t value; 805 uint64_t value;
675 806
807 pe += (unsigned long)amdgpu_bo_kptr(bo);
808
676 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 809 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
677 810
678 for (i = 0; i < count; i++) { 811 for (i = 0; i < count; i++) {
679 value = params->pages_addr ? 812 value = params->pages_addr ?
680 amdgpu_vm_map_gart(params->pages_addr, addr) : 813 amdgpu_vm_map_gart(params->pages_addr, addr) :
681 addr; 814 addr;
682 amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, 815 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
683 i, value, flags); 816 i, value, flags);
684 addr += incr; 817 addr += incr;
685 } 818 }
686} 819}
@@ -714,8 +847,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
714 struct amdgpu_vm_pt *parent, 847 struct amdgpu_vm_pt *parent,
715 struct amdgpu_vm_pt *entry) 848 struct amdgpu_vm_pt *entry)
716{ 849{
717 struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; 850 struct amdgpu_bo *bo = parent->base.bo, *pbo;
718 uint64_t pd_addr, shadow_addr = 0;
719 uint64_t pde, pt, flags; 851 uint64_t pde, pt, flags;
720 unsigned level; 852 unsigned level;
721 853
@@ -723,29 +855,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
723 if (entry->huge) 855 if (entry->huge)
724 return; 856 return;
725 857
726 if (vm->use_cpu_for_update) { 858 for (level = 0, pbo = bo->parent; pbo; ++level)
727 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
728 } else {
729 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
730 shadow = parent->base.bo->shadow;
731 if (shadow)
732 shadow_addr = amdgpu_bo_gpu_offset(shadow);
733 }
734
735 for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
736 pbo = pbo->parent; 859 pbo = pbo->parent;
737 860
738 level += params->adev->vm_manager.root_level; 861 level += params->adev->vm_manager.root_level;
739 pt = amdgpu_bo_gpu_offset(bo); 862 pt = amdgpu_bo_gpu_offset(entry->base.bo);
740 flags = AMDGPU_PTE_VALID; 863 flags = AMDGPU_PTE_VALID;
741 amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); 864 amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
742 if (shadow) { 865 pde = (entry - parent->entries) * 8;
743 pde = shadow_addr + (entry - parent->entries) * 8; 866 if (bo->shadow)
744 params->func(params, pde, pt, 1, 0, flags); 867 params->func(params, bo->shadow, pde, pt, 1, 0, flags);
745 } 868 params->func(params, bo, pde, pt, 1, 0, flags);
746
747 pde = pd_addr + (entry - parent->entries) * 8;
748 params->func(params, pde, pt, 1, 0, flags);
749} 869}
750 870
751/* 871/*
@@ -856,7 +976,7 @@ restart:
856 if (vm->use_cpu_for_update) { 976 if (vm->use_cpu_for_update) {
857 /* Flush HDP */ 977 /* Flush HDP */
858 mb(); 978 mb();
859 amdgpu_gart_flush_gpu_tlb(adev, 0); 979 amdgpu_asic_flush_hdp(adev, NULL);
860 } else if (params.ib->length_dw == 0) { 980 } else if (params.ib->length_dw == 0) {
861 amdgpu_job_free(job); 981 amdgpu_job_free(job);
862 } else { 982 } else {
@@ -870,11 +990,6 @@ restart:
870 amdgpu_ring_pad_ib(ring, params.ib); 990 amdgpu_ring_pad_ib(ring, params.ib);
871 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, 991 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
872 AMDGPU_FENCE_OWNER_VM, false); 992 AMDGPU_FENCE_OWNER_VM, false);
873 if (root->shadow)
874 amdgpu_sync_resv(adev, &job->sync,
875 root->shadow->tbo.resv,
876 AMDGPU_FENCE_OWNER_VM, false);
877
878 WARN_ON(params.ib->length_dw > ndw); 993 WARN_ON(params.ib->length_dw > ndw);
879 r = amdgpu_job_submit(job, ring, &vm->entity, 994 r = amdgpu_job_submit(job, ring, &vm->entity,
880 AMDGPU_FENCE_OWNER_VM, &fence); 995 AMDGPU_FENCE_OWNER_VM, &fence);
@@ -946,7 +1061,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
946 unsigned nptes, uint64_t dst, 1061 unsigned nptes, uint64_t dst,
947 uint64_t flags) 1062 uint64_t flags)
948{ 1063{
949 uint64_t pd_addr, pde; 1064 uint64_t pde;
950 1065
951 /* In the case of a mixed PT the PDE must point to it*/ 1066 /* In the case of a mixed PT the PDE must point to it*/
952 if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && 1067 if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
@@ -967,21 +1082,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
967 } 1082 }
968 1083
969 entry->huge = true; 1084 entry->huge = true;
970 amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, 1085 amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
971 &dst, &flags);
972 1086
973 if (p->func == amdgpu_vm_cpu_set_ptes) { 1087 pde = (entry - parent->entries) * 8;
974 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); 1088 if (parent->base.bo->shadow)
975 } else { 1089 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
976 if (parent->base.bo->shadow) { 1090 p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
977 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
978 pde = pd_addr + (entry - parent->entries) * 8;
979 p->func(p, pde, dst, 1, 0, flags);
980 }
981 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
982 }
983 pde = pd_addr + (entry - parent->entries) * 8;
984 p->func(p, pde, dst, 1, 0, flags);
985} 1091}
986 1092
987/** 1093/**
@@ -1007,7 +1113,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1007 uint64_t addr, pe_start; 1113 uint64_t addr, pe_start;
1008 struct amdgpu_bo *pt; 1114 struct amdgpu_bo *pt;
1009 unsigned nptes; 1115 unsigned nptes;
1010 bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
1011 1116
1012 /* walk over the address space and update the page tables */ 1117 /* walk over the address space and update the page tables */
1013 for (addr = start; addr < end; addr += nptes, 1118 for (addr = start; addr < end; addr += nptes,
@@ -1030,20 +1135,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1030 continue; 1135 continue;
1031 1136
1032 pt = entry->base.bo; 1137 pt = entry->base.bo;
1033 if (use_cpu_update) { 1138 pe_start = (addr & mask) * 8;
1034 pe_start = (unsigned long)amdgpu_bo_kptr(pt); 1139 if (pt->shadow)
1035 } else { 1140 params->func(params, pt->shadow, pe_start, dst, nptes,
1036 if (pt->shadow) { 1141 AMDGPU_GPU_PAGE_SIZE, flags);
1037 pe_start = amdgpu_bo_gpu_offset(pt->shadow); 1142 params->func(params, pt, pe_start, dst, nptes,
1038 pe_start += (addr & mask) * 8;
1039 params->func(params, pe_start, dst, nptes,
1040 AMDGPU_GPU_PAGE_SIZE, flags);
1041 }
1042 pe_start = amdgpu_bo_gpu_offset(pt);
1043 }
1044
1045 pe_start += (addr & mask) * 8;
1046 params->func(params, pe_start, dst, nptes,
1047 AMDGPU_GPU_PAGE_SIZE, flags); 1143 AMDGPU_GPU_PAGE_SIZE, flags);
1048 } 1144 }
1049 1145
@@ -1204,11 +1300,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1204 1300
1205 } else { 1301 } else {
1206 /* set page commands needed */ 1302 /* set page commands needed */
1207 ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; 1303 ndw += ncmds * 10;
1208 1304
1209 /* extra commands for begin/end fragments */ 1305 /* extra commands for begin/end fragments */
1210 ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw 1306 ndw += 2 * 10 * adev->vm_manager.fragment_size;
1211 * adev->vm_manager.fragment_size;
1212 1307
1213 params.func = amdgpu_vm_do_set_ptes; 1308 params.func = amdgpu_vm_do_set_ptes;
1214 } 1309 }
@@ -1457,7 +1552,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1457 if (vm->use_cpu_for_update) { 1552 if (vm->use_cpu_for_update) {
1458 /* Flush HDP */ 1553 /* Flush HDP */
1459 mb(); 1554 mb();
1460 amdgpu_gart_flush_gpu_tlb(adev, 0); 1555 amdgpu_asic_flush_hdp(adev, NULL);
1461 } 1556 }
1462 1557
1463 spin_lock(&vm->status_lock); 1558 spin_lock(&vm->status_lock);
@@ -1485,7 +1580,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1485 1580
1486 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); 1581 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1487 enable = !!atomic_read(&adev->vm_manager.num_prt_users); 1582 enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1488 adev->gart.gart_funcs->set_prt(adev, enable); 1583 adev->gmc.gmc_funcs->set_prt(adev, enable);
1489 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); 1584 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1490} 1585}
1491 1586
@@ -1494,7 +1589,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1494 */ 1589 */
1495static void amdgpu_vm_prt_get(struct amdgpu_device *adev) 1590static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1496{ 1591{
1497 if (!adev->gart.gart_funcs->set_prt) 1592 if (!adev->gmc.gmc_funcs->set_prt)
1498 return; 1593 return;
1499 1594
1500 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) 1595 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
@@ -1529,7 +1624,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1529{ 1624{
1530 struct amdgpu_prt_cb *cb; 1625 struct amdgpu_prt_cb *cb;
1531 1626
1532 if (!adev->gart.gart_funcs->set_prt) 1627 if (!adev->gmc.gmc_funcs->set_prt)
1533 return; 1628 return;
1534 1629
1535 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); 1630 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
@@ -1623,16 +1718,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1623 struct dma_fence **fence) 1718 struct dma_fence **fence)
1624{ 1719{
1625 struct amdgpu_bo_va_mapping *mapping; 1720 struct amdgpu_bo_va_mapping *mapping;
1721 uint64_t init_pte_value = 0;
1626 struct dma_fence *f = NULL; 1722 struct dma_fence *f = NULL;
1627 int r; 1723 int r;
1628 uint64_t init_pte_value = 0;
1629 1724
1630 while (!list_empty(&vm->freed)) { 1725 while (!list_empty(&vm->freed)) {
1631 mapping = list_first_entry(&vm->freed, 1726 mapping = list_first_entry(&vm->freed,
1632 struct amdgpu_bo_va_mapping, list); 1727 struct amdgpu_bo_va_mapping, list);
1633 list_del(&mapping->list); 1728 list_del(&mapping->list);
1634 1729
1635 if (vm->pte_support_ats) 1730 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
1636 init_pte_value = AMDGPU_PTE_DEFAULT_ATC; 1731 init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
1637 1732
1638 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, 1733 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
@@ -2262,11 +2357,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2262{ 2357{
2263 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2358 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2264 AMDGPU_VM_PTE_COUNT(adev) * 8); 2359 AMDGPU_VM_PTE_COUNT(adev) * 8);
2265 uint64_t init_pde_value = 0, flags;
2266 unsigned ring_instance; 2360 unsigned ring_instance;
2267 struct amdgpu_ring *ring; 2361 struct amdgpu_ring *ring;
2268 struct drm_sched_rq *rq; 2362 struct drm_sched_rq *rq;
2269 unsigned long size; 2363 unsigned long size;
2364 uint64_t flags;
2270 int r, i; 2365 int r, i;
2271 2366
2272 vm->va = RB_ROOT_CACHED; 2367 vm->va = RB_ROOT_CACHED;
@@ -2295,33 +2390,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2295 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2390 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2296 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2391 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2297 2392
2298 if (adev->asic_type == CHIP_RAVEN) { 2393 if (adev->asic_type == CHIP_RAVEN)
2299 vm->pte_support_ats = true; 2394 vm->pte_support_ats = true;
2300 init_pde_value = AMDGPU_PTE_DEFAULT_ATC 2395 } else {
2301 | AMDGPU_PDE_PTE;
2302
2303 }
2304 } else
2305 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2396 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2306 AMDGPU_VM_USE_CPU_FOR_GFX); 2397 AMDGPU_VM_USE_CPU_FOR_GFX);
2398 }
2307 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2399 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2308 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2400 vm->use_cpu_for_update ? "CPU" : "SDMA");
2309 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), 2401 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2310 "CPU update of VM recommended only for large BAR system\n"); 2402 "CPU update of VM recommended only for large BAR system\n");
2311 vm->last_update = NULL; 2403 vm->last_update = NULL;
2312 2404
2313 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 2405 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
2314 AMDGPU_GEM_CREATE_VRAM_CLEARED;
2315 if (vm->use_cpu_for_update) 2406 if (vm->use_cpu_for_update)
2316 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 2407 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2317 else 2408 else
2318 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 2409 flags |= AMDGPU_GEM_CREATE_SHADOW;
2319 AMDGPU_GEM_CREATE_SHADOW);
2320 2410
2321 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); 2411 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2322 r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, 2412 r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
2323 flags, NULL, NULL, init_pde_value, 2413 ttm_bo_type_kernel, NULL, &vm->root.base.bo);
2324 &vm->root.base.bo);
2325 if (r) 2414 if (r)
2326 goto error_free_sched_entity; 2415 goto error_free_sched_entity;
2327 2416
@@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2329 if (r) 2418 if (r)
2330 goto error_free_root; 2419 goto error_free_root;
2331 2420
2421 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2422 adev->vm_manager.root_level,
2423 vm->pte_support_ats);
2424 if (r)
2425 goto error_unreserve;
2426
2332 vm->root.base.vm = vm; 2427 vm->root.base.vm = vm;
2333 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); 2428 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
2334 list_add_tail(&vm->root.base.vm_status, &vm->evicted); 2429 list_add_tail(&vm->root.base.vm_status, &vm->evicted);
@@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2352 2447
2353 return 0; 2448 return 0;
2354 2449
2450error_unreserve:
2451 amdgpu_bo_unreserve(vm->root.base.bo);
2452
2355error_free_root: 2453error_free_root:
2356 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2454 amdgpu_bo_unref(&vm->root.base.bo->shadow);
2357 amdgpu_bo_unref(&vm->root.base.bo); 2455 amdgpu_bo_unref(&vm->root.base.bo);
@@ -2364,6 +2462,73 @@ error_free_sched_entity:
2364} 2462}
2365 2463
2366/** 2464/**
2465 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2466 *
2467 * This only works on GFX VMs that don't have any BOs added and no
2468 * page tables allocated yet.
2469 *
2470 * Changes the following VM parameters:
2471 * - use_cpu_for_update
2472 * - pte_supports_ats
2473 * - pasid (old PASID is released, because compute manages its own PASIDs)
2474 *
2475 * Reinitializes the page directory to reflect the changed ATS
2476 * setting. May leave behind an unused shadow BO for the page
2477 * directory when switching from SDMA updates to CPU updates.
2478 *
2479 * Returns 0 for success, -errno for errors.
2480 */
2481int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2482{
2483 bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2484 int r;
2485
2486 r = amdgpu_bo_reserve(vm->root.base.bo, true);
2487 if (r)
2488 return r;
2489
2490 /* Sanity checks */
2491 if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2492 r = -EINVAL;
2493 goto error;
2494 }
2495
2496 /* Check if PD needs to be reinitialized and do it before
2497 * changing any other state, in case it fails.
2498 */
2499 if (pte_support_ats != vm->pte_support_ats) {
2500 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2501 adev->vm_manager.root_level,
2502 pte_support_ats);
2503 if (r)
2504 goto error;
2505 }
2506
2507 /* Update VM state */
2508 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2509 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2510 vm->pte_support_ats = pte_support_ats;
2511 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2512 vm->use_cpu_for_update ? "CPU" : "SDMA");
2513 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2514 "CPU update of VM recommended only for large BAR system\n");
2515
2516 if (vm->pasid) {
2517 unsigned long flags;
2518
2519 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2520 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2521 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2522
2523 vm->pasid = 0;
2524 }
2525
2526error:
2527 amdgpu_bo_unreserve(vm->root.base.bo);
2528 return r;
2529}
2530
2531/**
2367 * amdgpu_vm_free_levels - free PD/PT levels 2532 * amdgpu_vm_free_levels - free PD/PT levels
2368 * 2533 *
2369 * @adev: amdgpu device structure 2534 * @adev: amdgpu device structure
@@ -2405,11 +2570,13 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2405void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2570void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2406{ 2571{
2407 struct amdgpu_bo_va_mapping *mapping, *tmp; 2572 struct amdgpu_bo_va_mapping *mapping, *tmp;
2408 bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; 2573 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2409 struct amdgpu_bo *root; 2574 struct amdgpu_bo *root;
2410 u64 fault; 2575 u64 fault;
2411 int i, r; 2576 int i, r;
2412 2577
2578 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2579
2413 /* Clear pending page faults from IH when the VM is destroyed */ 2580 /* Clear pending page faults from IH when the VM is destroyed */
2414 while (kfifo_get(&vm->faults, &fault)) 2581 while (kfifo_get(&vm->faults, &fault))
2415 amdgpu_ih_clear_fault(adev, fault); 2582 amdgpu_ih_clear_fault(adev, fault);