aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2017-08-09 08:15:46 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-08-31 13:45:24 -0400
commitea09729c930223edf492d0ca647c27e7eb0ccb12 (patch)
tree7af83967d9e12cb910cce6177163b9de84f4c4fc /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parentebe02de2c60caa3ee5a1b39c7c8b2a40e1fda2d8 (diff)
drm/amdgpu: rework page directory filling v2
Keep track off relocated PDs/PTs instead of walking and checking all PDs. v2: fix root PD handling Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1) Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c89
1 files changed, 60 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6ff3c1bf035e..faa08d5728da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -196,7 +196,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
196 } 196 }
197 197
198 spin_lock(&vm->status_lock); 198 spin_lock(&vm->status_lock);
199 list_del_init(&bo_base->vm_status); 199 list_move(&bo_base->vm_status, &vm->relocated);
200 } 200 }
201 spin_unlock(&vm->status_lock); 201 spin_unlock(&vm->status_lock);
202 202
@@ -314,8 +314,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
314 entry->base.vm = vm; 314 entry->base.vm = vm;
315 entry->base.bo = pt; 315 entry->base.bo = pt;
316 list_add_tail(&entry->base.bo_list, &pt->va); 316 list_add_tail(&entry->base.bo_list, &pt->va);
317 INIT_LIST_HEAD(&entry->base.vm_status); 317 spin_lock(&vm->status_lock);
318 entry->addr = 0; 318 list_add(&entry->base.vm_status, &vm->relocated);
319 spin_unlock(&vm->status_lock);
320 entry->addr = ~0ULL;
319 } 321 }
320 322
321 if (level < adev->vm_manager.num_level) { 323 if (level < adev->vm_manager.num_level) {
@@ -1000,18 +1002,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1000 */ 1002 */
1001static int amdgpu_vm_update_level(struct amdgpu_device *adev, 1003static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1002 struct amdgpu_vm *vm, 1004 struct amdgpu_vm *vm,
1003 struct amdgpu_vm_pt *parent, 1005 struct amdgpu_vm_pt *parent)
1004 unsigned level)
1005{ 1006{
1006 struct amdgpu_bo *shadow; 1007 struct amdgpu_bo *shadow;
1007 struct amdgpu_ring *ring = NULL; 1008 struct amdgpu_ring *ring = NULL;
1008 uint64_t pd_addr, shadow_addr = 0; 1009 uint64_t pd_addr, shadow_addr = 0;
1009 uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
1010 uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; 1010 uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
1011 unsigned count = 0, pt_idx, ndw = 0; 1011 unsigned count = 0, pt_idx, ndw = 0;
1012 struct amdgpu_job *job; 1012 struct amdgpu_job *job;
1013 struct amdgpu_pte_update_params params; 1013 struct amdgpu_pte_update_params params;
1014 struct dma_fence *fence = NULL; 1014 struct dma_fence *fence = NULL;
1015 uint32_t incr;
1015 1016
1016 int r; 1017 int r;
1017 1018
@@ -1059,12 +1060,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1059 1060
1060 /* walk over the address space and update the directory */ 1061 /* walk over the address space and update the directory */
1061 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { 1062 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1062 struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo; 1063 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1064 struct amdgpu_bo *bo = entry->base.bo;
1063 uint64_t pde, pt; 1065 uint64_t pde, pt;
1064 1066
1065 if (bo == NULL) 1067 if (bo == NULL)
1066 continue; 1068 continue;
1067 1069
1070 spin_lock(&vm->status_lock);
1071 list_del_init(&entry->base.vm_status);
1072 spin_unlock(&vm->status_lock);
1073
1068 pt = amdgpu_bo_gpu_offset(bo); 1074 pt = amdgpu_bo_gpu_offset(bo);
1069 pt = amdgpu_gart_get_vm_pde(adev, pt); 1075 pt = amdgpu_gart_get_vm_pde(adev, pt);
1070 /* Don't update huge pages here */ 1076 /* Don't update huge pages here */
@@ -1075,6 +1081,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1075 parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; 1081 parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
1076 1082
1077 pde = pd_addr + pt_idx * 8; 1083 pde = pd_addr + pt_idx * 8;
1084 incr = amdgpu_bo_size(bo);
1078 if (((last_pde + 8 * count) != pde) || 1085 if (((last_pde + 8 * count) != pde) ||
1079 ((last_pt + incr * count) != pt) || 1086 ((last_pt + incr * count) != pt) ||
1080 (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { 1087 (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
@@ -1135,20 +1142,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1135 dma_fence_put(fence); 1142 dma_fence_put(fence);
1136 } 1143 }
1137 } 1144 }
1138 /*
1139 * Recurse into the subdirectories. This recursion is harmless because
1140 * we only have a maximum of 5 layers.
1141 */
1142 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1143 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1144
1145 if (!entry->base.bo)
1146 continue;
1147
1148 r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
1149 if (r)
1150 return r;
1151 }
1152 1145
1153 return 0; 1146 return 0;
1154 1147
@@ -1164,7 +1157,8 @@ error_free:
1164 * 1157 *
1165 * Mark all PD level as invalid after an error. 1158 * Mark all PD level as invalid after an error.
1166 */ 1159 */
1167static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) 1160static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
1161 struct amdgpu_vm_pt *parent)
1168{ 1162{
1169 unsigned pt_idx; 1163 unsigned pt_idx;
1170 1164
@@ -1179,7 +1173,10 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
1179 continue; 1173 continue;
1180 1174
1181 entry->addr = ~0ULL; 1175 entry->addr = ~0ULL;
1182 amdgpu_vm_invalidate_level(entry); 1176 spin_lock(&vm->status_lock);
1177 list_move(&entry->base.vm_status, &vm->relocated);
1178 spin_unlock(&vm->status_lock);
1179 amdgpu_vm_invalidate_level(vm, entry);
1183 } 1180 }
1184} 1181}
1185 1182
@@ -1197,9 +1194,38 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
1197{ 1194{
1198 int r; 1195 int r;
1199 1196
1200 r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); 1197 spin_lock(&vm->status_lock);
1201 if (r) 1198 while (!list_empty(&vm->relocated)) {
1202 amdgpu_vm_invalidate_level(&vm->root); 1199 struct amdgpu_vm_bo_base *bo_base;
1200 struct amdgpu_bo *bo;
1201
1202 bo_base = list_first_entry(&vm->relocated,
1203 struct amdgpu_vm_bo_base,
1204 vm_status);
1205 spin_unlock(&vm->status_lock);
1206
1207 bo = bo_base->bo->parent;
1208 if (bo) {
1209 struct amdgpu_vm_bo_base *parent;
1210 struct amdgpu_vm_pt *pt;
1211
1212 parent = list_first_entry(&bo->va,
1213 struct amdgpu_vm_bo_base,
1214 bo_list);
1215 pt = container_of(parent, struct amdgpu_vm_pt, base);
1216
1217 r = amdgpu_vm_update_level(adev, vm, pt);
1218 if (r) {
1219 amdgpu_vm_invalidate_level(vm, &vm->root);
1220 return r;
1221 }
1222 spin_lock(&vm->status_lock);
1223 } else {
1224 spin_lock(&vm->status_lock);
1225 list_del_init(&bo_base->vm_status);
1226 }
1227 }
1228 spin_unlock(&vm->status_lock);
1203 1229
1204 if (vm->use_cpu_for_update) { 1230 if (vm->use_cpu_for_update) {
1205 /* Flush HDP */ 1231 /* Flush HDP */
@@ -1601,7 +1627,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1601 1627
1602error_free: 1628error_free:
1603 amdgpu_job_free(job); 1629 amdgpu_job_free(job);
1604 amdgpu_vm_invalidate_level(&vm->root); 1630 amdgpu_vm_invalidate_level(vm, &vm->root);
1605 return r; 1631 return r;
1606} 1632}
1607 1633
@@ -2391,9 +2417,13 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2391 continue; 2417 continue;
2392 } 2418 }
2393 2419
2394 /* Don't add page tables to the moved state */ 2420 if (bo->tbo.type == ttm_bo_type_kernel) {
2395 if (bo->tbo.type == ttm_bo_type_kernel) 2421 spin_lock(&bo_base->vm->status_lock);
2422 if (list_empty(&bo_base->vm_status))
2423 list_add(&bo_base->vm_status, &vm->relocated);
2424 spin_unlock(&bo_base->vm->status_lock);
2396 continue; 2425 continue;
2426 }
2397 2427
2398 spin_lock(&bo_base->vm->status_lock); 2428 spin_lock(&bo_base->vm->status_lock);
2399 list_move(&bo_base->vm_status, &bo_base->vm->moved); 2429 list_move(&bo_base->vm_status, &bo_base->vm->moved);
@@ -2483,6 +2513,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2483 vm->reserved_vmid[i] = NULL; 2513 vm->reserved_vmid[i] = NULL;
2484 spin_lock_init(&vm->status_lock); 2514 spin_lock_init(&vm->status_lock);
2485 INIT_LIST_HEAD(&vm->evicted); 2515 INIT_LIST_HEAD(&vm->evicted);
2516 INIT_LIST_HEAD(&vm->relocated);
2486 INIT_LIST_HEAD(&vm->moved); 2517 INIT_LIST_HEAD(&vm->moved);
2487 INIT_LIST_HEAD(&vm->freed); 2518 INIT_LIST_HEAD(&vm->freed);
2488 2519