diff options
author | Christian König <christian.koenig@amd.com> | 2017-08-09 08:15:46 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-08-31 13:45:24 -0400 |
commit | ea09729c930223edf492d0ca647c27e7eb0ccb12 (patch) | |
tree | 7af83967d9e12cb910cce6177163b9de84f4c4fc /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |
parent | ebe02de2c60caa3ee5a1b39c7c8b2a40e1fda2d8 (diff) |
drm/amdgpu: rework page directory filling v2
Keep track off relocated PDs/PTs instead of walking and checking all PDs.
v2: fix root PD handling
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 89 |
1 files changed, 60 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6ff3c1bf035e..faa08d5728da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -196,7 +196,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
196 | } | 196 | } |
197 | 197 | ||
198 | spin_lock(&vm->status_lock); | 198 | spin_lock(&vm->status_lock); |
199 | list_del_init(&bo_base->vm_status); | 199 | list_move(&bo_base->vm_status, &vm->relocated); |
200 | } | 200 | } |
201 | spin_unlock(&vm->status_lock); | 201 | spin_unlock(&vm->status_lock); |
202 | 202 | ||
@@ -314,8 +314,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
314 | entry->base.vm = vm; | 314 | entry->base.vm = vm; |
315 | entry->base.bo = pt; | 315 | entry->base.bo = pt; |
316 | list_add_tail(&entry->base.bo_list, &pt->va); | 316 | list_add_tail(&entry->base.bo_list, &pt->va); |
317 | INIT_LIST_HEAD(&entry->base.vm_status); | 317 | spin_lock(&vm->status_lock); |
318 | entry->addr = 0; | 318 | list_add(&entry->base.vm_status, &vm->relocated); |
319 | spin_unlock(&vm->status_lock); | ||
320 | entry->addr = ~0ULL; | ||
319 | } | 321 | } |
320 | 322 | ||
321 | if (level < adev->vm_manager.num_level) { | 323 | if (level < adev->vm_manager.num_level) { |
@@ -1000,18 +1002,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
1000 | */ | 1002 | */ |
1001 | static int amdgpu_vm_update_level(struct amdgpu_device *adev, | 1003 | static int amdgpu_vm_update_level(struct amdgpu_device *adev, |
1002 | struct amdgpu_vm *vm, | 1004 | struct amdgpu_vm *vm, |
1003 | struct amdgpu_vm_pt *parent, | 1005 | struct amdgpu_vm_pt *parent) |
1004 | unsigned level) | ||
1005 | { | 1006 | { |
1006 | struct amdgpu_bo *shadow; | 1007 | struct amdgpu_bo *shadow; |
1007 | struct amdgpu_ring *ring = NULL; | 1008 | struct amdgpu_ring *ring = NULL; |
1008 | uint64_t pd_addr, shadow_addr = 0; | 1009 | uint64_t pd_addr, shadow_addr = 0; |
1009 | uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); | ||
1010 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; | 1010 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; |
1011 | unsigned count = 0, pt_idx, ndw = 0; | 1011 | unsigned count = 0, pt_idx, ndw = 0; |
1012 | struct amdgpu_job *job; | 1012 | struct amdgpu_job *job; |
1013 | struct amdgpu_pte_update_params params; | 1013 | struct amdgpu_pte_update_params params; |
1014 | struct dma_fence *fence = NULL; | 1014 | struct dma_fence *fence = NULL; |
1015 | uint32_t incr; | ||
1015 | 1016 | ||
1016 | int r; | 1017 | int r; |
1017 | 1018 | ||
@@ -1059,12 +1060,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1059 | 1060 | ||
1060 | /* walk over the address space and update the directory */ | 1061 | /* walk over the address space and update the directory */ |
1061 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | 1062 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { |
1062 | struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo; | 1063 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; |
1064 | struct amdgpu_bo *bo = entry->base.bo; | ||
1063 | uint64_t pde, pt; | 1065 | uint64_t pde, pt; |
1064 | 1066 | ||
1065 | if (bo == NULL) | 1067 | if (bo == NULL) |
1066 | continue; | 1068 | continue; |
1067 | 1069 | ||
1070 | spin_lock(&vm->status_lock); | ||
1071 | list_del_init(&entry->base.vm_status); | ||
1072 | spin_unlock(&vm->status_lock); | ||
1073 | |||
1068 | pt = amdgpu_bo_gpu_offset(bo); | 1074 | pt = amdgpu_bo_gpu_offset(bo); |
1069 | pt = amdgpu_gart_get_vm_pde(adev, pt); | 1075 | pt = amdgpu_gart_get_vm_pde(adev, pt); |
1070 | /* Don't update huge pages here */ | 1076 | /* Don't update huge pages here */ |
@@ -1075,6 +1081,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1075 | parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; | 1081 | parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; |
1076 | 1082 | ||
1077 | pde = pd_addr + pt_idx * 8; | 1083 | pde = pd_addr + pt_idx * 8; |
1084 | incr = amdgpu_bo_size(bo); | ||
1078 | if (((last_pde + 8 * count) != pde) || | 1085 | if (((last_pde + 8 * count) != pde) || |
1079 | ((last_pt + incr * count) != pt) || | 1086 | ((last_pt + incr * count) != pt) || |
1080 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { | 1087 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { |
@@ -1135,20 +1142,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1135 | dma_fence_put(fence); | 1142 | dma_fence_put(fence); |
1136 | } | 1143 | } |
1137 | } | 1144 | } |
1138 | /* | ||
1139 | * Recurse into the subdirectories. This recursion is harmless because | ||
1140 | * we only have a maximum of 5 layers. | ||
1141 | */ | ||
1142 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | ||
1143 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | ||
1144 | |||
1145 | if (!entry->base.bo) | ||
1146 | continue; | ||
1147 | |||
1148 | r = amdgpu_vm_update_level(adev, vm, entry, level + 1); | ||
1149 | if (r) | ||
1150 | return r; | ||
1151 | } | ||
1152 | 1145 | ||
1153 | return 0; | 1146 | return 0; |
1154 | 1147 | ||
@@ -1164,7 +1157,8 @@ error_free: | |||
1164 | * | 1157 | * |
1165 | * Mark all PD level as invalid after an error. | 1158 | * Mark all PD level as invalid after an error. |
1166 | */ | 1159 | */ |
1167 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | 1160 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, |
1161 | struct amdgpu_vm_pt *parent) | ||
1168 | { | 1162 | { |
1169 | unsigned pt_idx; | 1163 | unsigned pt_idx; |
1170 | 1164 | ||
@@ -1179,7 +1173,10 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | |||
1179 | continue; | 1173 | continue; |
1180 | 1174 | ||
1181 | entry->addr = ~0ULL; | 1175 | entry->addr = ~0ULL; |
1182 | amdgpu_vm_invalidate_level(entry); | 1176 | spin_lock(&vm->status_lock); |
1177 | list_move(&entry->base.vm_status, &vm->relocated); | ||
1178 | spin_unlock(&vm->status_lock); | ||
1179 | amdgpu_vm_invalidate_level(vm, entry); | ||
1183 | } | 1180 | } |
1184 | } | 1181 | } |
1185 | 1182 | ||
@@ -1197,9 +1194,38 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, | |||
1197 | { | 1194 | { |
1198 | int r; | 1195 | int r; |
1199 | 1196 | ||
1200 | r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); | 1197 | spin_lock(&vm->status_lock); |
1201 | if (r) | 1198 | while (!list_empty(&vm->relocated)) { |
1202 | amdgpu_vm_invalidate_level(&vm->root); | 1199 | struct amdgpu_vm_bo_base *bo_base; |
1200 | struct amdgpu_bo *bo; | ||
1201 | |||
1202 | bo_base = list_first_entry(&vm->relocated, | ||
1203 | struct amdgpu_vm_bo_base, | ||
1204 | vm_status); | ||
1205 | spin_unlock(&vm->status_lock); | ||
1206 | |||
1207 | bo = bo_base->bo->parent; | ||
1208 | if (bo) { | ||
1209 | struct amdgpu_vm_bo_base *parent; | ||
1210 | struct amdgpu_vm_pt *pt; | ||
1211 | |||
1212 | parent = list_first_entry(&bo->va, | ||
1213 | struct amdgpu_vm_bo_base, | ||
1214 | bo_list); | ||
1215 | pt = container_of(parent, struct amdgpu_vm_pt, base); | ||
1216 | |||
1217 | r = amdgpu_vm_update_level(adev, vm, pt); | ||
1218 | if (r) { | ||
1219 | amdgpu_vm_invalidate_level(vm, &vm->root); | ||
1220 | return r; | ||
1221 | } | ||
1222 | spin_lock(&vm->status_lock); | ||
1223 | } else { | ||
1224 | spin_lock(&vm->status_lock); | ||
1225 | list_del_init(&bo_base->vm_status); | ||
1226 | } | ||
1227 | } | ||
1228 | spin_unlock(&vm->status_lock); | ||
1203 | 1229 | ||
1204 | if (vm->use_cpu_for_update) { | 1230 | if (vm->use_cpu_for_update) { |
1205 | /* Flush HDP */ | 1231 | /* Flush HDP */ |
@@ -1601,7 +1627,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1601 | 1627 | ||
1602 | error_free: | 1628 | error_free: |
1603 | amdgpu_job_free(job); | 1629 | amdgpu_job_free(job); |
1604 | amdgpu_vm_invalidate_level(&vm->root); | 1630 | amdgpu_vm_invalidate_level(vm, &vm->root); |
1605 | return r; | 1631 | return r; |
1606 | } | 1632 | } |
1607 | 1633 | ||
@@ -2391,9 +2417,13 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | |||
2391 | continue; | 2417 | continue; |
2392 | } | 2418 | } |
2393 | 2419 | ||
2394 | /* Don't add page tables to the moved state */ | 2420 | if (bo->tbo.type == ttm_bo_type_kernel) { |
2395 | if (bo->tbo.type == ttm_bo_type_kernel) | 2421 | spin_lock(&bo_base->vm->status_lock); |
2422 | if (list_empty(&bo_base->vm_status)) | ||
2423 | list_add(&bo_base->vm_status, &vm->relocated); | ||
2424 | spin_unlock(&bo_base->vm->status_lock); | ||
2396 | continue; | 2425 | continue; |
2426 | } | ||
2397 | 2427 | ||
2398 | spin_lock(&bo_base->vm->status_lock); | 2428 | spin_lock(&bo_base->vm->status_lock); |
2399 | list_move(&bo_base->vm_status, &bo_base->vm->moved); | 2429 | list_move(&bo_base->vm_status, &bo_base->vm->moved); |
@@ -2483,6 +2513,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2483 | vm->reserved_vmid[i] = NULL; | 2513 | vm->reserved_vmid[i] = NULL; |
2484 | spin_lock_init(&vm->status_lock); | 2514 | spin_lock_init(&vm->status_lock); |
2485 | INIT_LIST_HEAD(&vm->evicted); | 2515 | INIT_LIST_HEAD(&vm->evicted); |
2516 | INIT_LIST_HEAD(&vm->relocated); | ||
2486 | INIT_LIST_HEAD(&vm->moved); | 2517 | INIT_LIST_HEAD(&vm->moved); |
2487 | INIT_LIST_HEAD(&vm->freed); | 2518 | INIT_LIST_HEAD(&vm->freed); |
2488 | 2519 | ||