diff options
author | Christian König <christian.koenig@amd.com> | 2017-11-30 13:08:05 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-12-12 14:46:12 -0500 |
commit | 6989f2460f7d8163fcc4f0c99e47d62d22ea6f28 (patch) | |
tree | 98d1059c53b0649316d4eff5a0f6e3e4782ad0f0 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |
parent | 78eb2f0c719e6427eb5ac36cebe18df0578421d2 (diff) |
drm/amdgpu: batch PDE updates again
Now instead of one submission for each PDE batch them together over all
PDs who need an update.
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 203 |
1 files changed, 94 insertions, 109 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9e6cf130f832..2482b5dbe31c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -1056,118 +1056,46 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | /* | 1058 | /* |
1059 | * amdgpu_vm_update_level - update a single level in the hierarchy | 1059 | * amdgpu_vm_update_pde - update a single level in the hierarchy |
1060 | * | 1060 | * |
1061 | * @adev: amdgpu_device pointer | 1061 | * @param: parameters for the update |
1062 | * @vm: requested vm | 1062 | * @vm: requested vm |
1063 | * @parent: parent directory | 1063 | * @parent: parent directory |
1064 | * @entry: entry to update | ||
1064 | * | 1065 | * |
1065 | * Makes sure all entries in @parent are up to date. | 1066 | * Makes sure the requested entry in parent is up to date. |
1066 | * Returns 0 for success, error for failure. | ||
1067 | */ | 1067 | */ |
1068 | static int amdgpu_vm_update_pde(struct amdgpu_device *adev, | 1068 | static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, |
1069 | struct amdgpu_vm *vm, | 1069 | struct amdgpu_vm *vm, |
1070 | struct amdgpu_vm_pt *parent, | 1070 | struct amdgpu_vm_pt *parent, |
1071 | struct amdgpu_vm_pt *entry) | 1071 | struct amdgpu_vm_pt *entry) |
1072 | { | 1072 | { |
1073 | struct amdgpu_pte_update_params params; | 1073 | struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL; |
1074 | struct amdgpu_bo *bo = entry->base.bo; | ||
1075 | struct amdgpu_bo *shadow; | ||
1076 | struct amdgpu_ring *ring = NULL; | ||
1077 | uint64_t pd_addr, shadow_addr = 0; | 1074 | uint64_t pd_addr, shadow_addr = 0; |
1078 | struct amdgpu_job *job; | ||
1079 | struct dma_fence *fence = NULL; | ||
1080 | unsigned ndw = 0; | ||
1081 | uint64_t pde, pt; | 1075 | uint64_t pde, pt; |
1082 | 1076 | ||
1083 | int r; | 1077 | /* Don't update huge pages here */ |
1084 | 1078 | if (entry->huge) | |
1085 | if (!parent->entries) | 1079 | return; |
1086 | return 0; | ||
1087 | |||
1088 | memset(¶ms, 0, sizeof(params)); | ||
1089 | params.adev = adev; | ||
1090 | shadow = parent->base.bo->shadow; | ||
1091 | 1080 | ||
1092 | if (vm->use_cpu_for_update) { | 1081 | if (vm->use_cpu_for_update) { |
1093 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); | 1082 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); |
1094 | r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); | ||
1095 | if (unlikely(r)) | ||
1096 | return r; | ||
1097 | |||
1098 | params.func = amdgpu_vm_cpu_set_ptes; | ||
1099 | } else { | 1083 | } else { |
1100 | ring = container_of(vm->entity.sched, struct amdgpu_ring, | ||
1101 | sched); | ||
1102 | |||
1103 | /* should be sufficient for two commands plus padding, etc. */ | ||
1104 | ndw = 64; | ||
1105 | |||
1106 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); | 1084 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); |
1085 | shadow = parent->base.bo->shadow; | ||
1107 | if (shadow) | 1086 | if (shadow) |
1108 | shadow_addr = amdgpu_bo_gpu_offset(shadow); | 1087 | shadow_addr = amdgpu_bo_gpu_offset(shadow); |
1109 | else | ||
1110 | shadow_addr = 0; | ||
1111 | |||
1112 | r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); | ||
1113 | if (r) | ||
1114 | return r; | ||
1115 | |||
1116 | params.ib = &job->ibs[0]; | ||
1117 | params.func = amdgpu_vm_do_set_ptes; | ||
1118 | } | 1088 | } |
1119 | 1089 | ||
1120 | spin_lock(&vm->status_lock); | ||
1121 | list_del_init(&entry->base.vm_status); | ||
1122 | spin_unlock(&vm->status_lock); | ||
1123 | |||
1124 | pt = amdgpu_bo_gpu_offset(bo); | 1090 | pt = amdgpu_bo_gpu_offset(bo); |
1125 | pt = amdgpu_gart_get_vm_pde(adev, pt); | 1091 | pt = amdgpu_gart_get_vm_pde(params->adev, pt); |
1126 | /* Don't update huge pages here */ | ||
1127 | if (entry->huge) { | ||
1128 | if (!vm->use_cpu_for_update) | ||
1129 | amdgpu_job_free(job); | ||
1130 | return 0; | ||
1131 | } | ||
1132 | |||
1133 | if (shadow) { | 1092 | if (shadow) { |
1134 | pde = shadow_addr + (entry - parent->entries) * 8; | 1093 | pde = shadow_addr + (entry - parent->entries) * 8; |
1135 | params.func(¶ms, pde, pt, 1, 0, AMDGPU_PTE_VALID); | 1094 | params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID); |
1136 | } | 1095 | } |
1137 | 1096 | ||
1138 | pde = pd_addr + (entry - parent->entries) * 8; | 1097 | pde = pd_addr + (entry - parent->entries) * 8; |
1139 | params.func(¶ms, pde, pt, 1, 0, AMDGPU_PTE_VALID); | 1098 | params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID); |
1140 | |||
1141 | if (!vm->use_cpu_for_update) { | ||
1142 | if (params.ib->length_dw == 0) { | ||
1143 | amdgpu_job_free(job); | ||
1144 | } else { | ||
1145 | amdgpu_ring_pad_ib(ring, params.ib); | ||
1146 | amdgpu_sync_resv(adev, &job->sync, | ||
1147 | parent->base.bo->tbo.resv, | ||
1148 | AMDGPU_FENCE_OWNER_VM, false); | ||
1149 | if (shadow) | ||
1150 | amdgpu_sync_resv(adev, &job->sync, | ||
1151 | shadow->tbo.resv, | ||
1152 | AMDGPU_FENCE_OWNER_VM, false); | ||
1153 | |||
1154 | WARN_ON(params.ib->length_dw > ndw); | ||
1155 | r = amdgpu_job_submit(job, ring, &vm->entity, | ||
1156 | AMDGPU_FENCE_OWNER_VM, &fence); | ||
1157 | if (r) | ||
1158 | goto error_free; | ||
1159 | |||
1160 | amdgpu_bo_fence(parent->base.bo, fence, true); | ||
1161 | dma_fence_put(vm->last_update); | ||
1162 | vm->last_update = fence; | ||
1163 | } | ||
1164 | } | ||
1165 | |||
1166 | return 0; | ||
1167 | |||
1168 | error_free: | ||
1169 | amdgpu_job_free(job); | ||
1170 | return r; | ||
1171 | } | 1099 | } |
1172 | 1100 | ||
1173 | /* | 1101 | /* |
@@ -1215,41 +1143,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, | |||
1215 | int amdgpu_vm_update_directories(struct amdgpu_device *adev, | 1143 | int amdgpu_vm_update_directories(struct amdgpu_device *adev, |
1216 | struct amdgpu_vm *vm) | 1144 | struct amdgpu_vm *vm) |
1217 | { | 1145 | { |
1146 | struct amdgpu_pte_update_params params; | ||
1147 | struct amdgpu_job *job; | ||
1148 | unsigned ndw = 0; | ||
1218 | int r = 0; | 1149 | int r = 0; |
1219 | 1150 | ||
1151 | if (list_empty(&vm->relocated)) | ||
1152 | return 0; | ||
1153 | |||
1154 | restart: | ||
1155 | memset(¶ms, 0, sizeof(params)); | ||
1156 | params.adev = adev; | ||
1157 | |||
1158 | if (vm->use_cpu_for_update) { | ||
1159 | r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); | ||
1160 | if (unlikely(r)) | ||
1161 | return r; | ||
1162 | |||
1163 | params.func = amdgpu_vm_cpu_set_ptes; | ||
1164 | } else { | ||
1165 | ndw = 512 * 8; | ||
1166 | r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); | ||
1167 | if (r) | ||
1168 | return r; | ||
1169 | |||
1170 | params.ib = &job->ibs[0]; | ||
1171 | params.func = amdgpu_vm_do_set_ptes; | ||
1172 | } | ||
1173 | |||
1220 | spin_lock(&vm->status_lock); | 1174 | spin_lock(&vm->status_lock); |
1221 | while (!list_empty(&vm->relocated)) { | 1175 | while (!list_empty(&vm->relocated)) { |
1222 | struct amdgpu_vm_bo_base *bo_base; | 1176 | struct amdgpu_vm_bo_base *bo_base, *parent; |
1177 | struct amdgpu_vm_pt *pt, *entry; | ||
1223 | struct amdgpu_bo *bo; | 1178 | struct amdgpu_bo *bo; |
1224 | 1179 | ||
1225 | bo_base = list_first_entry(&vm->relocated, | 1180 | bo_base = list_first_entry(&vm->relocated, |
1226 | struct amdgpu_vm_bo_base, | 1181 | struct amdgpu_vm_bo_base, |
1227 | vm_status); | 1182 | vm_status); |
1183 | list_del_init(&bo_base->vm_status); | ||
1228 | spin_unlock(&vm->status_lock); | 1184 | spin_unlock(&vm->status_lock); |
1229 | 1185 | ||
1230 | bo = bo_base->bo->parent; | 1186 | bo = bo_base->bo->parent; |
1231 | if (bo) { | 1187 | if (!bo) { |
1232 | struct amdgpu_vm_bo_base *parent; | ||
1233 | struct amdgpu_vm_pt *pt, *entry; | ||
1234 | |||
1235 | parent = list_first_entry(&bo->va, | ||
1236 | struct amdgpu_vm_bo_base, | ||
1237 | bo_list); | ||
1238 | pt = container_of(parent, struct amdgpu_vm_pt, base); | ||
1239 | entry = container_of(bo_base, struct amdgpu_vm_pt, | ||
1240 | base); | ||
1241 | |||
1242 | r = amdgpu_vm_update_pde(adev, vm, pt, entry); | ||
1243 | if (r) { | ||
1244 | amdgpu_vm_invalidate_level(adev, vm, | ||
1245 | &vm->root, 0); | ||
1246 | return r; | ||
1247 | } | ||
1248 | spin_lock(&vm->status_lock); | 1188 | spin_lock(&vm->status_lock); |
1249 | } else { | 1189 | continue; |
1250 | spin_lock(&vm->status_lock); | ||
1251 | list_del_init(&bo_base->vm_status); | ||
1252 | } | 1190 | } |
1191 | |||
1192 | parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, | ||
1193 | bo_list); | ||
1194 | pt = container_of(parent, struct amdgpu_vm_pt, base); | ||
1195 | entry = container_of(bo_base, struct amdgpu_vm_pt, base); | ||
1196 | |||
1197 | amdgpu_vm_update_pde(¶ms, vm, pt, entry); | ||
1198 | |||
1199 | spin_lock(&vm->status_lock); | ||
1200 | if (!vm->use_cpu_for_update && | ||
1201 | (ndw - params.ib->length_dw) < 32) | ||
1202 | break; | ||
1253 | } | 1203 | } |
1254 | spin_unlock(&vm->status_lock); | 1204 | spin_unlock(&vm->status_lock); |
1255 | 1205 | ||
@@ -1257,8 +1207,43 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, | |||
1257 | /* Flush HDP */ | 1207 | /* Flush HDP */ |
1258 | mb(); | 1208 | mb(); |
1259 | amdgpu_gart_flush_gpu_tlb(adev, 0); | 1209 | amdgpu_gart_flush_gpu_tlb(adev, 0); |
1210 | } else if (params.ib->length_dw == 0) { | ||
1211 | amdgpu_job_free(job); | ||
1212 | } else { | ||
1213 | struct amdgpu_bo *root = vm->root.base.bo; | ||
1214 | struct amdgpu_ring *ring; | ||
1215 | struct dma_fence *fence; | ||
1216 | |||
1217 | ring = container_of(vm->entity.sched, struct amdgpu_ring, | ||
1218 | sched); | ||
1219 | |||
1220 | amdgpu_ring_pad_ib(ring, params.ib); | ||
1221 | amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, | ||
1222 | AMDGPU_FENCE_OWNER_VM, false); | ||
1223 | if (root->shadow) | ||
1224 | amdgpu_sync_resv(adev, &job->sync, | ||
1225 | root->shadow->tbo.resv, | ||
1226 | AMDGPU_FENCE_OWNER_VM, false); | ||
1227 | |||
1228 | WARN_ON(params.ib->length_dw > ndw); | ||
1229 | r = amdgpu_job_submit(job, ring, &vm->entity, | ||
1230 | AMDGPU_FENCE_OWNER_VM, &fence); | ||
1231 | if (r) | ||
1232 | goto error; | ||
1233 | |||
1234 | amdgpu_bo_fence(root, fence, true); | ||
1235 | dma_fence_put(vm->last_update); | ||
1236 | vm->last_update = fence; | ||
1260 | } | 1237 | } |
1261 | 1238 | ||
1239 | if (!list_empty(&vm->relocated)) | ||
1240 | goto restart; | ||
1241 | |||
1242 | return 0; | ||
1243 | |||
1244 | error: | ||
1245 | amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0); | ||
1246 | amdgpu_job_free(job); | ||
1262 | return r; | 1247 | return r; |
1263 | } | 1248 | } |
1264 | 1249 | ||