aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2017-11-30 13:08:05 -0500
committerAlex Deucher <alexander.deucher@amd.com>2017-12-12 14:46:12 -0500
commit6989f2460f7d8163fcc4f0c99e47d62d22ea6f28 (patch)
tree98d1059c53b0649316d4eff5a0f6e3e4782ad0f0 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent78eb2f0c719e6427eb5ac36cebe18df0578421d2 (diff)
drm/amdgpu: batch PDE updates again
Now instead of one submission for each PDE batch them together over all PDs who need an update. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c203
1 files changed, 94 insertions, 109 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9e6cf130f832..2482b5dbe31c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1056,118 +1056,46 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1056} 1056}
1057 1057
1058/* 1058/*
1059 * amdgpu_vm_update_level - update a single level in the hierarchy 1059 * amdgpu_vm_update_pde - update a single level in the hierarchy
1060 * 1060 *
1061 * @adev: amdgpu_device pointer 1061 * @param: parameters for the update
1062 * @vm: requested vm 1062 * @vm: requested vm
1063 * @parent: parent directory 1063 * @parent: parent directory
1064 * @entry: entry to update
1064 * 1065 *
1065 * Makes sure all entries in @parent are up to date. 1066 * Makes sure the requested entry in parent is up to date.
1066 * Returns 0 for success, error for failure.
1067 */ 1067 */
1068static int amdgpu_vm_update_pde(struct amdgpu_device *adev, 1068static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
1069 struct amdgpu_vm *vm, 1069 struct amdgpu_vm *vm,
1070 struct amdgpu_vm_pt *parent, 1070 struct amdgpu_vm_pt *parent,
1071 struct amdgpu_vm_pt *entry) 1071 struct amdgpu_vm_pt *entry)
1072{ 1072{
1073 struct amdgpu_pte_update_params params; 1073 struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL;
1074 struct amdgpu_bo *bo = entry->base.bo;
1075 struct amdgpu_bo *shadow;
1076 struct amdgpu_ring *ring = NULL;
1077 uint64_t pd_addr, shadow_addr = 0; 1074 uint64_t pd_addr, shadow_addr = 0;
1078 struct amdgpu_job *job;
1079 struct dma_fence *fence = NULL;
1080 unsigned ndw = 0;
1081 uint64_t pde, pt; 1075 uint64_t pde, pt;
1082 1076
1083 int r; 1077 /* Don't update huge pages here */
1084 1078 if (entry->huge)
1085 if (!parent->entries) 1079 return;
1086 return 0;
1087
1088 memset(&params, 0, sizeof(params));
1089 params.adev = adev;
1090 shadow = parent->base.bo->shadow;
1091 1080
1092 if (vm->use_cpu_for_update) { 1081 if (vm->use_cpu_for_update) {
1093 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); 1082 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
1094 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
1095 if (unlikely(r))
1096 return r;
1097
1098 params.func = amdgpu_vm_cpu_set_ptes;
1099 } else { 1083 } else {
1100 ring = container_of(vm->entity.sched, struct amdgpu_ring,
1101 sched);
1102
1103 /* should be sufficient for two commands plus padding, etc. */
1104 ndw = 64;
1105
1106 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); 1084 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
1085 shadow = parent->base.bo->shadow;
1107 if (shadow) 1086 if (shadow)
1108 shadow_addr = amdgpu_bo_gpu_offset(shadow); 1087 shadow_addr = amdgpu_bo_gpu_offset(shadow);
1109 else
1110 shadow_addr = 0;
1111
1112 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1113 if (r)
1114 return r;
1115
1116 params.ib = &job->ibs[0];
1117 params.func = amdgpu_vm_do_set_ptes;
1118 } 1088 }
1119 1089
1120 spin_lock(&vm->status_lock);
1121 list_del_init(&entry->base.vm_status);
1122 spin_unlock(&vm->status_lock);
1123
1124 pt = amdgpu_bo_gpu_offset(bo); 1090 pt = amdgpu_bo_gpu_offset(bo);
1125 pt = amdgpu_gart_get_vm_pde(adev, pt); 1091 pt = amdgpu_gart_get_vm_pde(params->adev, pt);
1126 /* Don't update huge pages here */
1127 if (entry->huge) {
1128 if (!vm->use_cpu_for_update)
1129 amdgpu_job_free(job);
1130 return 0;
1131 }
1132
1133 if (shadow) { 1092 if (shadow) {
1134 pde = shadow_addr + (entry - parent->entries) * 8; 1093 pde = shadow_addr + (entry - parent->entries) * 8;
1135 params.func(&params, pde, pt, 1, 0, AMDGPU_PTE_VALID); 1094 params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
1136 } 1095 }
1137 1096
1138 pde = pd_addr + (entry - parent->entries) * 8; 1097 pde = pd_addr + (entry - parent->entries) * 8;
1139 params.func(&params, pde, pt, 1, 0, AMDGPU_PTE_VALID); 1098 params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
1140
1141 if (!vm->use_cpu_for_update) {
1142 if (params.ib->length_dw == 0) {
1143 amdgpu_job_free(job);
1144 } else {
1145 amdgpu_ring_pad_ib(ring, params.ib);
1146 amdgpu_sync_resv(adev, &job->sync,
1147 parent->base.bo->tbo.resv,
1148 AMDGPU_FENCE_OWNER_VM, false);
1149 if (shadow)
1150 amdgpu_sync_resv(adev, &job->sync,
1151 shadow->tbo.resv,
1152 AMDGPU_FENCE_OWNER_VM, false);
1153
1154 WARN_ON(params.ib->length_dw > ndw);
1155 r = amdgpu_job_submit(job, ring, &vm->entity,
1156 AMDGPU_FENCE_OWNER_VM, &fence);
1157 if (r)
1158 goto error_free;
1159
1160 amdgpu_bo_fence(parent->base.bo, fence, true);
1161 dma_fence_put(vm->last_update);
1162 vm->last_update = fence;
1163 }
1164 }
1165
1166 return 0;
1167
1168error_free:
1169 amdgpu_job_free(job);
1170 return r;
1171} 1099}
1172 1100
1173/* 1101/*
@@ -1215,41 +1143,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
1215int amdgpu_vm_update_directories(struct amdgpu_device *adev, 1143int amdgpu_vm_update_directories(struct amdgpu_device *adev,
1216 struct amdgpu_vm *vm) 1144 struct amdgpu_vm *vm)
1217{ 1145{
1146 struct amdgpu_pte_update_params params;
1147 struct amdgpu_job *job;
1148 unsigned ndw = 0;
1218 int r = 0; 1149 int r = 0;
1219 1150
1151 if (list_empty(&vm->relocated))
1152 return 0;
1153
1154restart:
1155 memset(&params, 0, sizeof(params));
1156 params.adev = adev;
1157
1158 if (vm->use_cpu_for_update) {
1159 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
1160 if (unlikely(r))
1161 return r;
1162
1163 params.func = amdgpu_vm_cpu_set_ptes;
1164 } else {
1165 ndw = 512 * 8;
1166 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1167 if (r)
1168 return r;
1169
1170 params.ib = &job->ibs[0];
1171 params.func = amdgpu_vm_do_set_ptes;
1172 }
1173
1220 spin_lock(&vm->status_lock); 1174 spin_lock(&vm->status_lock);
1221 while (!list_empty(&vm->relocated)) { 1175 while (!list_empty(&vm->relocated)) {
1222 struct amdgpu_vm_bo_base *bo_base; 1176 struct amdgpu_vm_bo_base *bo_base, *parent;
1177 struct amdgpu_vm_pt *pt, *entry;
1223 struct amdgpu_bo *bo; 1178 struct amdgpu_bo *bo;
1224 1179
1225 bo_base = list_first_entry(&vm->relocated, 1180 bo_base = list_first_entry(&vm->relocated,
1226 struct amdgpu_vm_bo_base, 1181 struct amdgpu_vm_bo_base,
1227 vm_status); 1182 vm_status);
1183 list_del_init(&bo_base->vm_status);
1228 spin_unlock(&vm->status_lock); 1184 spin_unlock(&vm->status_lock);
1229 1185
1230 bo = bo_base->bo->parent; 1186 bo = bo_base->bo->parent;
1231 if (bo) { 1187 if (!bo) {
1232 struct amdgpu_vm_bo_base *parent;
1233 struct amdgpu_vm_pt *pt, *entry;
1234
1235 parent = list_first_entry(&bo->va,
1236 struct amdgpu_vm_bo_base,
1237 bo_list);
1238 pt = container_of(parent, struct amdgpu_vm_pt, base);
1239 entry = container_of(bo_base, struct amdgpu_vm_pt,
1240 base);
1241
1242 r = amdgpu_vm_update_pde(adev, vm, pt, entry);
1243 if (r) {
1244 amdgpu_vm_invalidate_level(adev, vm,
1245 &vm->root, 0);
1246 return r;
1247 }
1248 spin_lock(&vm->status_lock); 1188 spin_lock(&vm->status_lock);
1249 } else { 1189 continue;
1250 spin_lock(&vm->status_lock);
1251 list_del_init(&bo_base->vm_status);
1252 } 1190 }
1191
1192 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
1193 bo_list);
1194 pt = container_of(parent, struct amdgpu_vm_pt, base);
1195 entry = container_of(bo_base, struct amdgpu_vm_pt, base);
1196
1197 amdgpu_vm_update_pde(&params, vm, pt, entry);
1198
1199 spin_lock(&vm->status_lock);
1200 if (!vm->use_cpu_for_update &&
1201 (ndw - params.ib->length_dw) < 32)
1202 break;
1253 } 1203 }
1254 spin_unlock(&vm->status_lock); 1204 spin_unlock(&vm->status_lock);
1255 1205
@@ -1257,8 +1207,43 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
1257 /* Flush HDP */ 1207 /* Flush HDP */
1258 mb(); 1208 mb();
1259 amdgpu_gart_flush_gpu_tlb(adev, 0); 1209 amdgpu_gart_flush_gpu_tlb(adev, 0);
1210 } else if (params.ib->length_dw == 0) {
1211 amdgpu_job_free(job);
1212 } else {
1213 struct amdgpu_bo *root = vm->root.base.bo;
1214 struct amdgpu_ring *ring;
1215 struct dma_fence *fence;
1216
1217 ring = container_of(vm->entity.sched, struct amdgpu_ring,
1218 sched);
1219
1220 amdgpu_ring_pad_ib(ring, params.ib);
1221 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
1222 AMDGPU_FENCE_OWNER_VM, false);
1223 if (root->shadow)
1224 amdgpu_sync_resv(adev, &job->sync,
1225 root->shadow->tbo.resv,
1226 AMDGPU_FENCE_OWNER_VM, false);
1227
1228 WARN_ON(params.ib->length_dw > ndw);
1229 r = amdgpu_job_submit(job, ring, &vm->entity,
1230 AMDGPU_FENCE_OWNER_VM, &fence);
1231 if (r)
1232 goto error;
1233
1234 amdgpu_bo_fence(root, fence, true);
1235 dma_fence_put(vm->last_update);
1236 vm->last_update = fence;
1260 } 1237 }
1261 1238
1239 if (!list_empty(&vm->relocated))
1240 goto restart;
1241
1242 return 0;
1243
1244error:
1245 amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0);
1246 amdgpu_job_free(job);
1262 return r; 1247 return r;
1263} 1248}
1264 1249