aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2018-01-24 11:19:04 -0500
committerAlex Deucher <alexander.deucher@amd.com>2018-02-19 14:18:53 -0500
commit13307f7e1d0c05a68f4ba19193cbd213573a8680 (patch)
tree321db2669540ea25d35df982b83e98d1bf73c2d0 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent4383736340f01f10e6cc8a80c3374bf38eaf7470 (diff)
drm/amdgpu: revert "drm/amdgpu: use AMDGPU_GEM_CREATE_VRAM_CLEARED for VM PD/PTs" v2
Using the standard clear turned out to be to inflexible. First of all it is executed on the system queue, together with buffer moves instead on the per VM queue. And second we need to fill in the page tables with more than just zero. We keep the new functionality of initializing the PDEs/PTEs with ATC routing entries intact. v2: update commit message. Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c116
1 files changed, 92 insertions, 24 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0df52cb1765b..5cdd8d9c3311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -258,6 +258,74 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
258} 258}
259 259
260/** 260/**
261 * amdgpu_vm_clear_bo - initially clear the PDs/PTs
262 *
263 * @adev: amdgpu_device pointer
264 * @bo: BO to clear
265 * @level: level this BO is at
266 *
267 * Root PD needs to be reserved when calling this.
268 */
269static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
270 struct amdgpu_vm *vm,
271 struct amdgpu_bo *bo,
272 unsigned level)
273{
274 struct ttm_operation_ctx ctx = { true, false };
275 struct dma_fence *fence = NULL;
276 uint64_t addr, init_value;
277 struct amdgpu_ring *ring;
278 struct amdgpu_job *job;
279 unsigned entries;
280 int r;
281
282 if (vm->pte_support_ats) {
283 init_value = AMDGPU_PTE_DEFAULT_ATC;
284 if (level != AMDGPU_VM_PTB)
285 init_value |= AMDGPU_PDE_PTE;
286 } else {
287 init_value = 0;
288 }
289
290 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
291
292 r = reservation_object_reserve_shared(bo->tbo.resv);
293 if (r)
294 return r;
295
296 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
297 if (r)
298 goto error;
299
300 addr = amdgpu_bo_gpu_offset(bo);
301 entries = amdgpu_bo_size(bo) / 8;
302
303 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
304 if (r)
305 goto error;
306
307 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
308 entries, 0, init_value);
309 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
310
311 WARN_ON(job->ibs[0].length_dw > 64);
312 r = amdgpu_job_submit(job, ring, &vm->entity,
313 AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
314 if (r)
315 goto error_free;
316
317 amdgpu_bo_fence(bo, fence, true);
318 dma_fence_put(fence);
319 return 0;
320
321error_free:
322 amdgpu_job_free(job);
323
324error:
325 return r;
326}
327
328/**
261 * amdgpu_vm_alloc_levels - allocate the PD/PT levels 329 * amdgpu_vm_alloc_levels - allocate the PD/PT levels
262 * 330 *
263 * @adev: amdgpu_device pointer 331 * @adev: amdgpu_device pointer
@@ -275,9 +343,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
275{ 343{
276 unsigned shift = amdgpu_vm_level_shift(adev, level); 344 unsigned shift = amdgpu_vm_level_shift(adev, level);
277 unsigned pt_idx, from, to; 345 unsigned pt_idx, from, to;
278 int r;
279 u64 flags; 346 u64 flags;
280 uint64_t init_value = 0; 347 int r;
281 348
282 if (!parent->entries) { 349 if (!parent->entries) {
283 unsigned num_entries = amdgpu_vm_num_entries(adev, level); 350 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
@@ -300,21 +367,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
300 saddr = saddr & ((1 << shift) - 1); 367 saddr = saddr & ((1 << shift) - 1);
301 eaddr = eaddr & ((1 << shift) - 1); 368 eaddr = eaddr & ((1 << shift) - 1);
302 369
303 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 370 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
304 AMDGPU_GEM_CREATE_VRAM_CLEARED;
305 if (vm->use_cpu_for_update) 371 if (vm->use_cpu_for_update)
306 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 372 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
307 else 373 else
308 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 374 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
309 AMDGPU_GEM_CREATE_SHADOW); 375 AMDGPU_GEM_CREATE_SHADOW);
310 376
311 if (vm->pte_support_ats) {
312 init_value = AMDGPU_PTE_DEFAULT_ATC;
313 if (level != AMDGPU_VM_PTB)
314 init_value |= AMDGPU_PDE_PTE;
315
316 }
317
318 /* walk over the address space and allocate the page tables */ 377 /* walk over the address space and allocate the page tables */
319 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 378 for (pt_idx = from; pt_idx <= to; ++pt_idx) {
320 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 379 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
@@ -325,12 +384,17 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
325 r = amdgpu_bo_create(adev, 384 r = amdgpu_bo_create(adev,
326 amdgpu_vm_bo_size(adev, level), 385 amdgpu_vm_bo_size(adev, level),
327 AMDGPU_GPU_PAGE_SIZE, true, 386 AMDGPU_GPU_PAGE_SIZE, true,
328 AMDGPU_GEM_DOMAIN_VRAM, 387 AMDGPU_GEM_DOMAIN_VRAM, flags,
329 flags, 388 NULL, resv, 0, &pt);
330 NULL, resv, init_value, &pt);
331 if (r) 389 if (r)
332 return r; 390 return r;
333 391
392 r = amdgpu_vm_clear_bo(adev, vm, pt, level);
393 if (r) {
394 amdgpu_bo_unref(&pt);
395 return r;
396 }
397
334 if (vm->use_cpu_for_update) { 398 if (vm->use_cpu_for_update) {
335 r = amdgpu_bo_kmap(pt, NULL); 399 r = amdgpu_bo_kmap(pt, NULL);
336 if (r) { 400 if (r) {
@@ -2241,11 +2305,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2241{ 2305{
2242 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2306 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2243 AMDGPU_VM_PTE_COUNT(adev) * 8); 2307 AMDGPU_VM_PTE_COUNT(adev) * 8);
2244 uint64_t init_pde_value = 0, flags;
2245 unsigned ring_instance; 2308 unsigned ring_instance;
2246 struct amdgpu_ring *ring; 2309 struct amdgpu_ring *ring;
2247 struct drm_sched_rq *rq; 2310 struct drm_sched_rq *rq;
2248 unsigned long size; 2311 unsigned long size;
2312 uint64_t flags;
2249 int r, i; 2313 int r, i;
2250 2314
2251 vm->va = RB_ROOT_CACHED; 2315 vm->va = RB_ROOT_CACHED;
@@ -2274,23 +2338,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2274 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2338 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2275 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2339 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2276 2340
2277 if (adev->asic_type == CHIP_RAVEN) { 2341 if (adev->asic_type == CHIP_RAVEN)
2278 vm->pte_support_ats = true; 2342 vm->pte_support_ats = true;
2279 init_pde_value = AMDGPU_PTE_DEFAULT_ATC 2343 } else {
2280 | AMDGPU_PDE_PTE;
2281
2282 }
2283 } else
2284 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2344 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2285 AMDGPU_VM_USE_CPU_FOR_GFX); 2345 AMDGPU_VM_USE_CPU_FOR_GFX);
2346 }
2286 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2347 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2287 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2348 vm->use_cpu_for_update ? "CPU" : "SDMA");
2288 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), 2349 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2289 "CPU update of VM recommended only for large BAR system\n"); 2350 "CPU update of VM recommended only for large BAR system\n");
2290 vm->last_update = NULL; 2351 vm->last_update = NULL;
2291 2352
2292 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 2353 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
2293 AMDGPU_GEM_CREATE_VRAM_CLEARED;
2294 if (vm->use_cpu_for_update) 2354 if (vm->use_cpu_for_update)
2295 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 2355 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2296 else 2356 else
@@ -2299,7 +2359,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2299 2359
2300 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); 2360 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2301 r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, 2361 r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM,
2302 flags, NULL, NULL, init_pde_value, 2362 flags, NULL, NULL, 0,
2303 &vm->root.base.bo); 2363 &vm->root.base.bo);
2304 if (r) 2364 if (r)
2305 goto error_free_sched_entity; 2365 goto error_free_sched_entity;
@@ -2308,6 +2368,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2308 if (r) 2368 if (r)
2309 goto error_free_root; 2369 goto error_free_root;
2310 2370
2371 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2372 adev->vm_manager.root_level);
2373 if (r)
2374 goto error_unreserve;
2375
2311 vm->root.base.vm = vm; 2376 vm->root.base.vm = vm;
2312 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); 2377 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
2313 list_add_tail(&vm->root.base.vm_status, &vm->evicted); 2378 list_add_tail(&vm->root.base.vm_status, &vm->evicted);
@@ -2331,6 +2396,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2331 2396
2332 return 0; 2397 return 0;
2333 2398
2399error_unreserve:
2400 amdgpu_bo_unreserve(vm->root.base.bo);
2401
2334error_free_root: 2402error_free_root:
2335 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2403 amdgpu_bo_unref(&vm->root.base.bo->shadow);
2336 amdgpu_bo_unref(&vm->root.base.bo); 2404 amdgpu_bo_unref(&vm->root.base.bo);