diff options
| author | Christian König <christian.koenig@amd.com> | 2017-08-09 08:15:46 -0400 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2017-08-31 13:45:24 -0400 |
| commit | ea09729c930223edf492d0ca647c27e7eb0ccb12 (patch) | |
| tree | 7af83967d9e12cb910cce6177163b9de84f4c4fc /drivers | |
| parent | ebe02de2c60caa3ee5a1b39c7c8b2a40e1fda2d8 (diff) | |
drm/amdgpu: rework page directory filling v2
Keep track off relocated PDs/PTs instead of walking and checking all PDs.
v2: fix root PD handling
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 89 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 |
2 files changed, 63 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6ff3c1bf035e..faa08d5728da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
| @@ -196,7 +196,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | spin_lock(&vm->status_lock); | 198 | spin_lock(&vm->status_lock); |
| 199 | list_del_init(&bo_base->vm_status); | 199 | list_move(&bo_base->vm_status, &vm->relocated); |
| 200 | } | 200 | } |
| 201 | spin_unlock(&vm->status_lock); | 201 | spin_unlock(&vm->status_lock); |
| 202 | 202 | ||
| @@ -314,8 +314,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
| 314 | entry->base.vm = vm; | 314 | entry->base.vm = vm; |
| 315 | entry->base.bo = pt; | 315 | entry->base.bo = pt; |
| 316 | list_add_tail(&entry->base.bo_list, &pt->va); | 316 | list_add_tail(&entry->base.bo_list, &pt->va); |
| 317 | INIT_LIST_HEAD(&entry->base.vm_status); | 317 | spin_lock(&vm->status_lock); |
| 318 | entry->addr = 0; | 318 | list_add(&entry->base.vm_status, &vm->relocated); |
| 319 | spin_unlock(&vm->status_lock); | ||
| 320 | entry->addr = ~0ULL; | ||
| 319 | } | 321 | } |
| 320 | 322 | ||
| 321 | if (level < adev->vm_manager.num_level) { | 323 | if (level < adev->vm_manager.num_level) { |
| @@ -1000,18 +1002,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 1000 | */ | 1002 | */ |
| 1001 | static int amdgpu_vm_update_level(struct amdgpu_device *adev, | 1003 | static int amdgpu_vm_update_level(struct amdgpu_device *adev, |
| 1002 | struct amdgpu_vm *vm, | 1004 | struct amdgpu_vm *vm, |
| 1003 | struct amdgpu_vm_pt *parent, | 1005 | struct amdgpu_vm_pt *parent) |
| 1004 | unsigned level) | ||
| 1005 | { | 1006 | { |
| 1006 | struct amdgpu_bo *shadow; | 1007 | struct amdgpu_bo *shadow; |
| 1007 | struct amdgpu_ring *ring = NULL; | 1008 | struct amdgpu_ring *ring = NULL; |
| 1008 | uint64_t pd_addr, shadow_addr = 0; | 1009 | uint64_t pd_addr, shadow_addr = 0; |
| 1009 | uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); | ||
| 1010 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; | 1010 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; |
| 1011 | unsigned count = 0, pt_idx, ndw = 0; | 1011 | unsigned count = 0, pt_idx, ndw = 0; |
| 1012 | struct amdgpu_job *job; | 1012 | struct amdgpu_job *job; |
| 1013 | struct amdgpu_pte_update_params params; | 1013 | struct amdgpu_pte_update_params params; |
| 1014 | struct dma_fence *fence = NULL; | 1014 | struct dma_fence *fence = NULL; |
| 1015 | uint32_t incr; | ||
| 1015 | 1016 | ||
| 1016 | int r; | 1017 | int r; |
| 1017 | 1018 | ||
| @@ -1059,12 +1060,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
| 1059 | 1060 | ||
| 1060 | /* walk over the address space and update the directory */ | 1061 | /* walk over the address space and update the directory */ |
| 1061 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | 1062 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { |
| 1062 | struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo; | 1063 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; |
| 1064 | struct amdgpu_bo *bo = entry->base.bo; | ||
| 1063 | uint64_t pde, pt; | 1065 | uint64_t pde, pt; |
| 1064 | 1066 | ||
| 1065 | if (bo == NULL) | 1067 | if (bo == NULL) |
| 1066 | continue; | 1068 | continue; |
| 1067 | 1069 | ||
| 1070 | spin_lock(&vm->status_lock); | ||
| 1071 | list_del_init(&entry->base.vm_status); | ||
| 1072 | spin_unlock(&vm->status_lock); | ||
| 1073 | |||
| 1068 | pt = amdgpu_bo_gpu_offset(bo); | 1074 | pt = amdgpu_bo_gpu_offset(bo); |
| 1069 | pt = amdgpu_gart_get_vm_pde(adev, pt); | 1075 | pt = amdgpu_gart_get_vm_pde(adev, pt); |
| 1070 | /* Don't update huge pages here */ | 1076 | /* Don't update huge pages here */ |
| @@ -1075,6 +1081,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
| 1075 | parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; | 1081 | parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; |
| 1076 | 1082 | ||
| 1077 | pde = pd_addr + pt_idx * 8; | 1083 | pde = pd_addr + pt_idx * 8; |
| 1084 | incr = amdgpu_bo_size(bo); | ||
| 1078 | if (((last_pde + 8 * count) != pde) || | 1085 | if (((last_pde + 8 * count) != pde) || |
| 1079 | ((last_pt + incr * count) != pt) || | 1086 | ((last_pt + incr * count) != pt) || |
| 1080 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { | 1087 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { |
| @@ -1135,20 +1142,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
| 1135 | dma_fence_put(fence); | 1142 | dma_fence_put(fence); |
| 1136 | } | 1143 | } |
| 1137 | } | 1144 | } |
| 1138 | /* | ||
| 1139 | * Recurse into the subdirectories. This recursion is harmless because | ||
| 1140 | * we only have a maximum of 5 layers. | ||
| 1141 | */ | ||
| 1142 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | ||
| 1143 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | ||
| 1144 | |||
| 1145 | if (!entry->base.bo) | ||
| 1146 | continue; | ||
| 1147 | |||
| 1148 | r = amdgpu_vm_update_level(adev, vm, entry, level + 1); | ||
| 1149 | if (r) | ||
| 1150 | return r; | ||
| 1151 | } | ||
| 1152 | 1145 | ||
| 1153 | return 0; | 1146 | return 0; |
| 1154 | 1147 | ||
| @@ -1164,7 +1157,8 @@ error_free: | |||
| 1164 | * | 1157 | * |
| 1165 | * Mark all PD level as invalid after an error. | 1158 | * Mark all PD level as invalid after an error. |
| 1166 | */ | 1159 | */ |
| 1167 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | 1160 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, |
| 1161 | struct amdgpu_vm_pt *parent) | ||
| 1168 | { | 1162 | { |
| 1169 | unsigned pt_idx; | 1163 | unsigned pt_idx; |
| 1170 | 1164 | ||
| @@ -1179,7 +1173,10 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | |||
| 1179 | continue; | 1173 | continue; |
| 1180 | 1174 | ||
| 1181 | entry->addr = ~0ULL; | 1175 | entry->addr = ~0ULL; |
| 1182 | amdgpu_vm_invalidate_level(entry); | 1176 | spin_lock(&vm->status_lock); |
| 1177 | list_move(&entry->base.vm_status, &vm->relocated); | ||
| 1178 | spin_unlock(&vm->status_lock); | ||
| 1179 | amdgpu_vm_invalidate_level(vm, entry); | ||
| 1183 | } | 1180 | } |
| 1184 | } | 1181 | } |
| 1185 | 1182 | ||
| @@ -1197,9 +1194,38 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, | |||
| 1197 | { | 1194 | { |
| 1198 | int r; | 1195 | int r; |
| 1199 | 1196 | ||
| 1200 | r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); | 1197 | spin_lock(&vm->status_lock); |
| 1201 | if (r) | 1198 | while (!list_empty(&vm->relocated)) { |
| 1202 | amdgpu_vm_invalidate_level(&vm->root); | 1199 | struct amdgpu_vm_bo_base *bo_base; |
| 1200 | struct amdgpu_bo *bo; | ||
| 1201 | |||
| 1202 | bo_base = list_first_entry(&vm->relocated, | ||
| 1203 | struct amdgpu_vm_bo_base, | ||
| 1204 | vm_status); | ||
| 1205 | spin_unlock(&vm->status_lock); | ||
| 1206 | |||
| 1207 | bo = bo_base->bo->parent; | ||
| 1208 | if (bo) { | ||
| 1209 | struct amdgpu_vm_bo_base *parent; | ||
| 1210 | struct amdgpu_vm_pt *pt; | ||
| 1211 | |||
| 1212 | parent = list_first_entry(&bo->va, | ||
| 1213 | struct amdgpu_vm_bo_base, | ||
| 1214 | bo_list); | ||
| 1215 | pt = container_of(parent, struct amdgpu_vm_pt, base); | ||
| 1216 | |||
| 1217 | r = amdgpu_vm_update_level(adev, vm, pt); | ||
| 1218 | if (r) { | ||
| 1219 | amdgpu_vm_invalidate_level(vm, &vm->root); | ||
| 1220 | return r; | ||
| 1221 | } | ||
| 1222 | spin_lock(&vm->status_lock); | ||
| 1223 | } else { | ||
| 1224 | spin_lock(&vm->status_lock); | ||
| 1225 | list_del_init(&bo_base->vm_status); | ||
| 1226 | } | ||
| 1227 | } | ||
| 1228 | spin_unlock(&vm->status_lock); | ||
| 1203 | 1229 | ||
| 1204 | if (vm->use_cpu_for_update) { | 1230 | if (vm->use_cpu_for_update) { |
| 1205 | /* Flush HDP */ | 1231 | /* Flush HDP */ |
| @@ -1601,7 +1627,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
| 1601 | 1627 | ||
| 1602 | error_free: | 1628 | error_free: |
| 1603 | amdgpu_job_free(job); | 1629 | amdgpu_job_free(job); |
| 1604 | amdgpu_vm_invalidate_level(&vm->root); | 1630 | amdgpu_vm_invalidate_level(vm, &vm->root); |
| 1605 | return r; | 1631 | return r; |
| 1606 | } | 1632 | } |
| 1607 | 1633 | ||
| @@ -2391,9 +2417,13 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | |||
| 2391 | continue; | 2417 | continue; |
| 2392 | } | 2418 | } |
| 2393 | 2419 | ||
| 2394 | /* Don't add page tables to the moved state */ | 2420 | if (bo->tbo.type == ttm_bo_type_kernel) { |
| 2395 | if (bo->tbo.type == ttm_bo_type_kernel) | 2421 | spin_lock(&bo_base->vm->status_lock); |
| 2422 | if (list_empty(&bo_base->vm_status)) | ||
| 2423 | list_add(&bo_base->vm_status, &vm->relocated); | ||
| 2424 | spin_unlock(&bo_base->vm->status_lock); | ||
| 2396 | continue; | 2425 | continue; |
| 2426 | } | ||
| 2397 | 2427 | ||
| 2398 | spin_lock(&bo_base->vm->status_lock); | 2428 | spin_lock(&bo_base->vm->status_lock); |
| 2399 | list_move(&bo_base->vm_status, &bo_base->vm->moved); | 2429 | list_move(&bo_base->vm_status, &bo_base->vm->moved); |
| @@ -2483,6 +2513,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 2483 | vm->reserved_vmid[i] = NULL; | 2513 | vm->reserved_vmid[i] = NULL; |
| 2484 | spin_lock_init(&vm->status_lock); | 2514 | spin_lock_init(&vm->status_lock); |
| 2485 | INIT_LIST_HEAD(&vm->evicted); | 2515 | INIT_LIST_HEAD(&vm->evicted); |
| 2516 | INIT_LIST_HEAD(&vm->relocated); | ||
| 2486 | INIT_LIST_HEAD(&vm->moved); | 2517 | INIT_LIST_HEAD(&vm->moved); |
| 2487 | INIT_LIST_HEAD(&vm->freed); | 2518 | INIT_LIST_HEAD(&vm->freed); |
| 2488 | 2519 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 4e465e817fe8..c3753afe9853 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |||
| @@ -129,6 +129,9 @@ struct amdgpu_vm { | |||
| 129 | /* BOs who needs a validation */ | 129 | /* BOs who needs a validation */ |
| 130 | struct list_head evicted; | 130 | struct list_head evicted; |
| 131 | 131 | ||
| 132 | /* PT BOs which relocated and their parent need an update */ | ||
| 133 | struct list_head relocated; | ||
| 134 | |||
| 132 | /* BOs moved, but not yet updated in the PT */ | 135 | /* BOs moved, but not yet updated in the PT */ |
| 133 | struct list_head moved; | 136 | struct list_head moved; |
| 134 | 137 | ||
