aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2019-01-30 10:07:29 -0500
committerAlex Deucher <alexander.deucher@amd.com>2019-03-19 16:36:48 -0400
commit0ce15d6f7d3fb1162fd7de2829dbdf6d63a6a02a (patch)
treec83a9a486a58d392808ea1453a7f5fa8b689d352 /drivers/gpu/drm/amd/amdgpu
parent780637cbdf8fd614cc85a01c6c810d9d28902a59 (diff)
drm/amdgpu: allocate VM PDs/PTs on demand
Let's start to allocate VM PDs/PTs on demand instead of pre-allocating them during mapping. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Huang Rui <ray.huang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c136
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h3
5 files changed, 39 insertions, 129 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 4e96ad84efaa..314c048fcac6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
410 if (p_bo_va_entry) 410 if (p_bo_va_entry)
411 *p_bo_va_entry = bo_va_entry; 411 *p_bo_va_entry = bo_va_entry;
412 412
413 /* Allocate new page tables if needed and validate 413 /* Allocate validate page tables if needed */
414 * them.
415 */
416 ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
417 if (ret) {
418 pr_err("Failed to allocate pts, err=%d\n", ret);
419 goto err_alloc_pts;
420 }
421
422 ret = vm_validate_pt_pd_bos(vm); 414 ret = vm_validate_pt_pd_bos(vm);
423 if (ret) { 415 if (ret) {
424 pr_err("validate_pt_pd_bos() failed\n"); 416 pr_err("validate_pt_pd_bos() failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 7e22be7ca68a..54dd02a898b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
92 return -ENOMEM; 92 return -ENOMEM;
93 } 93 }
94 94
95 r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
96 size);
97 if (r) {
98 DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
99 amdgpu_vm_bo_rmv(adev, *bo_va);
100 ttm_eu_backoff_reservation(&ticket, &list);
101 return r;
102 }
103
104 r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, 95 r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
105 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | 96 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
106 AMDGPU_PTE_EXECUTABLE); 97 AMDGPU_PTE_EXECUTABLE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 555285e329ed..fcaaac30e84b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
625 625
626 switch (args->operation) { 626 switch (args->operation) {
627 case AMDGPU_VA_OP_MAP: 627 case AMDGPU_VA_OP_MAP:
628 r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
629 args->map_size);
630 if (r)
631 goto error_backoff;
632
633 va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); 628 va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
634 r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, 629 r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
635 args->offset_in_bo, args->map_size, 630 args->offset_in_bo, args->map_size,
@@ -645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
645 args->map_size); 640 args->map_size);
646 break; 641 break;
647 case AMDGPU_VA_OP_REPLACE: 642 case AMDGPU_VA_OP_REPLACE:
648 r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
649 args->map_size);
650 if (r)
651 goto error_backoff;
652
653 va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); 643 va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
654 r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, 644 r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
655 args->offset_in_bo, args->map_size, 645 args->offset_in_bo, args->map_size,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 90c6970e080f..b25be87eb412 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -521,47 +521,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
521} 521}
522 522
523/** 523/**
524 * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
525 *
526 * @adev: amdgpu_device pointer
527 * @vm: amdgpu_vm structure
528 * @start: start addr of the walk
529 * @cursor: state to initialize
530 *
531 * Start a walk and go directly to the leaf node.
532 */
533static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
534 struct amdgpu_vm *vm, uint64_t start,
535 struct amdgpu_vm_pt_cursor *cursor)
536{
537 amdgpu_vm_pt_start(adev, vm, start, cursor);
538 while (amdgpu_vm_pt_descendant(adev, cursor));
539}
540
541/**
542 * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
543 *
544 * @adev: amdgpu_device pointer
545 * @cursor: current state
546 *
547 * Walk the PD/PT tree to the next leaf node.
548 */
549static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
550 struct amdgpu_vm_pt_cursor *cursor)
551{
552 amdgpu_vm_pt_next(adev, cursor);
553 if (cursor->pfn != ~0ll)
554 while (amdgpu_vm_pt_descendant(adev, cursor));
555}
556
557/**
558 * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
559 */
560#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \
561 for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \
562 (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
563
564/**
565 * amdgpu_vm_pt_first_dfs - start a deep first search 524 * amdgpu_vm_pt_first_dfs - start a deep first search
566 * 525 *
567 * @adev: amdgpu_device structure 526 * @adev: amdgpu_device structure
@@ -932,74 +891,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
932 * Returns: 891 * Returns:
933 * 0 on success, errno otherwise. 892 * 0 on success, errno otherwise.
934 */ 893 */
935int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 894static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
936 struct amdgpu_vm *vm, 895 struct amdgpu_vm *vm,
937 uint64_t saddr, uint64_t size) 896 struct amdgpu_vm_pt_cursor *cursor)
938{ 897{
939 struct amdgpu_vm_pt_cursor cursor; 898 struct amdgpu_vm_pt *entry = cursor->entry;
899 struct amdgpu_bo_param bp;
940 struct amdgpu_bo *pt; 900 struct amdgpu_bo *pt;
941 uint64_t eaddr;
942 int r; 901 int r;
943 902
944 /* validate the parameters */ 903 if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
945 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) 904 unsigned num_entries;
946 return -EINVAL;
947 905
948 eaddr = saddr + size - 1; 906 num_entries = amdgpu_vm_num_entries(adev, cursor->level);
949 907 entry->entries = kvmalloc_array(num_entries,
950 saddr /= AMDGPU_GPU_PAGE_SIZE; 908 sizeof(*entry->entries),
951 eaddr /= AMDGPU_GPU_PAGE_SIZE; 909 GFP_KERNEL | __GFP_ZERO);
952 910 if (!entry->entries)
953 if (eaddr >= adev->vm_manager.max_pfn) { 911 return -ENOMEM;
954 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
955 eaddr, adev->vm_manager.max_pfn);
956 return -EINVAL;
957 } 912 }
958 913
959 for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) { 914 if (entry->base.bo)
960 struct amdgpu_vm_pt *entry = cursor.entry; 915 return 0;
961 struct amdgpu_bo_param bp;
962
963 if (cursor.level < AMDGPU_VM_PTB) {
964 unsigned num_entries;
965
966 num_entries = amdgpu_vm_num_entries(adev, cursor.level);
967 entry->entries = kvmalloc_array(num_entries,
968 sizeof(*entry->entries),
969 GFP_KERNEL |
970 __GFP_ZERO);
971 if (!entry->entries)
972 return -ENOMEM;
973 }
974
975
976 if (entry->base.bo)
977 continue;
978
979 amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
980
981 r = amdgpu_bo_create(adev, &bp, &pt);
982 if (r)
983 return r;
984
985 if (vm->use_cpu_for_update) {
986 r = amdgpu_bo_kmap(pt, NULL);
987 if (r)
988 goto error_free_pt;
989 }
990 916
991 /* Keep a reference to the root directory to avoid 917 amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
992 * freeing them up in the wrong order.
993 */
994 pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
995 918
996 amdgpu_vm_bo_base_init(&entry->base, vm, pt); 919 r = amdgpu_bo_create(adev, &bp, &pt);
920 if (r)
921 return r;
997 922
998 r = amdgpu_vm_clear_bo(adev, vm, pt); 923 if (vm->use_cpu_for_update) {
924 r = amdgpu_bo_kmap(pt, NULL);
999 if (r) 925 if (r)
1000 goto error_free_pt; 926 goto error_free_pt;
1001 } 927 }
1002 928
929 /* Keep a reference to the root directory to avoid
930 * freeing them up in the wrong order.
931 */
932 pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
933 amdgpu_vm_bo_base_init(&entry->base, vm, pt);
934
935 r = amdgpu_vm_clear_bo(adev, vm, pt);
936 if (r)
937 goto error_free_pt;
938
1003 return 0; 939 return 0;
1004 940
1005error_free_pt: 941error_free_pt:
@@ -1644,6 +1580,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1644 struct amdgpu_vm_pt_cursor cursor; 1580 struct amdgpu_vm_pt_cursor cursor;
1645 uint64_t frag_start = start, frag_end; 1581 uint64_t frag_start = start, frag_end;
1646 unsigned int frag; 1582 unsigned int frag;
1583 int r;
1647 1584
1648 /* figure out the initial fragment */ 1585 /* figure out the initial fragment */
1649 amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); 1586 amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
@@ -1651,12 +1588,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1651 /* walk over the address space and update the PTs */ 1588 /* walk over the address space and update the PTs */
1652 amdgpu_vm_pt_start(adev, params->vm, start, &cursor); 1589 amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
1653 while (cursor.pfn < end) { 1590 while (cursor.pfn < end) {
1654 struct amdgpu_bo *pt = cursor.entry->base.bo;
1655 unsigned shift, parent_shift, mask; 1591 unsigned shift, parent_shift, mask;
1656 uint64_t incr, entry_end, pe_start; 1592 uint64_t incr, entry_end, pe_start;
1593 struct amdgpu_bo *pt;
1657 1594
1658 if (!pt) 1595 r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
1659 return -ENOENT; 1596 if (r)
1597 return r;
1598
1599 pt = cursor.entry->base.bo;
1660 1600
1661 /* The root level can't be a huge page */ 1601 /* The root level can't be a huge page */
1662 if (cursor.level == adev->vm_manager.root_level) { 1602 if (cursor.level == adev->vm_manager.root_level) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 81ff8177f092..116605c038d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
303int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, 303int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
304 int (*callback)(void *p, struct amdgpu_bo *bo), 304 int (*callback)(void *p, struct amdgpu_bo *bo),
305 void *param); 305 void *param);
306int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
307 struct amdgpu_vm *vm,
308 uint64_t saddr, uint64_t size);
309int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); 306int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
310int amdgpu_vm_update_directories(struct amdgpu_device *adev, 307int amdgpu_vm_update_directories(struct amdgpu_device *adev,
311 struct amdgpu_vm *vm); 308 struct amdgpu_vm *vm);