aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2017-06-30 05:05:54 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-07-14 11:06:21 -0400
commitabca90f1c8103528ca4b194fdc69e933bd23db4c (patch)
tree3c020449c92270995b55e41fd4e81ad8d0101fc5 /drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
parent0c2c421e2657da6eece66bd22eaaedf21dcebef7 (diff)
drm/amdgpu: use the GTT windows for BO moves v2
This way we don't need to map the full BO at a time any more. v2: use fixed windows for src/dst Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c125
1 files changed, 106 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ace178b393dd..4a34b61d44ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -47,10 +47,15 @@
47 47
48#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) 48#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
49 49
50static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
51 struct ttm_mem_reg *mem, unsigned num_pages,
52 uint64_t offset, unsigned window,
53 struct amdgpu_ring *ring,
54 uint64_t *addr);
55
50static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); 56static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
51static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); 57static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
52 58
53
54/* 59/*
55 * Global memory. 60 * Global memory.
56 */ 61 */
@@ -97,6 +102,8 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
97 goto error_bo; 102 goto error_bo;
98 } 103 }
99 104
105 mutex_init(&adev->mman.gtt_window_lock);
106
100 ring = adev->mman.buffer_funcs_ring; 107 ring = adev->mman.buffer_funcs_ring;
101 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; 108 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
102 r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, 109 r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
@@ -123,6 +130,7 @@ static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
123 if (adev->mman.mem_global_referenced) { 130 if (adev->mman.mem_global_referenced) {
124 amd_sched_entity_fini(adev->mman.entity.sched, 131 amd_sched_entity_fini(adev->mman.entity.sched,
125 &adev->mman.entity); 132 &adev->mman.entity);
133 mutex_destroy(&adev->mman.gtt_window_lock);
126 drm_global_item_unref(&adev->mman.bo_global_ref.ref); 134 drm_global_item_unref(&adev->mman.bo_global_ref.ref);
127 drm_global_item_unref(&adev->mman.mem_global_ref); 135 drm_global_item_unref(&adev->mman.mem_global_ref);
128 adev->mman.mem_global_referenced = false; 136 adev->mman.mem_global_referenced = false;
@@ -256,10 +264,13 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
256 struct drm_mm_node *mm_node, 264 struct drm_mm_node *mm_node,
257 struct ttm_mem_reg *mem) 265 struct ttm_mem_reg *mem)
258{ 266{
259 uint64_t addr; 267 uint64_t addr = 0;
260 268
261 addr = mm_node->start << PAGE_SHIFT; 269 if (mem->mem_type != TTM_PL_TT ||
262 addr += bo->bdev->man[mem->mem_type].gpu_offset; 270 amdgpu_gtt_mgr_is_allocated(mem)) {
271 addr = mm_node->start << PAGE_SHIFT;
272 addr += bo->bdev->man[mem->mem_type].gpu_offset;
273 }
263 return addr; 274 return addr;
264} 275}
265 276
@@ -284,34 +295,41 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
284 return -EINVAL; 295 return -EINVAL;
285 } 296 }
286 297
287 if (old_mem->mem_type == TTM_PL_TT) {
288 r = amdgpu_ttm_bind(bo, old_mem);
289 if (r)
290 return r;
291 }
292
293 old_mm = old_mem->mm_node; 298 old_mm = old_mem->mm_node;
294 old_size = old_mm->size; 299 old_size = old_mm->size;
295 old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); 300 old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
296 301
297 if (new_mem->mem_type == TTM_PL_TT) {
298 r = amdgpu_ttm_bind(bo, new_mem);
299 if (r)
300 return r;
301 }
302
303 new_mm = new_mem->mm_node; 302 new_mm = new_mem->mm_node;
304 new_size = new_mm->size; 303 new_size = new_mm->size;
305 new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); 304 new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
306 305
307 num_pages = new_mem->num_pages; 306 num_pages = new_mem->num_pages;
307 mutex_lock(&adev->mman.gtt_window_lock);
308 while (num_pages) { 308 while (num_pages) {
309 unsigned long cur_pages = min(old_size, new_size); 309 unsigned long cur_pages = min(min(old_size, new_size),
310 (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
311 uint64_t from = old_start, to = new_start;
310 struct dma_fence *next; 312 struct dma_fence *next;
311 313
312 r = amdgpu_copy_buffer(ring, old_start, new_start, 314 if (old_mem->mem_type == TTM_PL_TT &&
315 !amdgpu_gtt_mgr_is_allocated(old_mem)) {
316 r = amdgpu_map_buffer(bo, old_mem, cur_pages,
317 old_start, 0, ring, &from);
318 if (r)
319 goto error;
320 }
321
322 if (new_mem->mem_type == TTM_PL_TT &&
323 !amdgpu_gtt_mgr_is_allocated(new_mem)) {
324 r = amdgpu_map_buffer(bo, new_mem, cur_pages,
325 new_start, 1, ring, &to);
326 if (r)
327 goto error;
328 }
329
330 r = amdgpu_copy_buffer(ring, from, to,
313 cur_pages * PAGE_SIZE, 331 cur_pages * PAGE_SIZE,
314 bo->resv, &next, false, false); 332 bo->resv, &next, false, true);
315 if (r) 333 if (r)
316 goto error; 334 goto error;
317 335
@@ -338,12 +356,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
338 new_start += cur_pages * PAGE_SIZE; 356 new_start += cur_pages * PAGE_SIZE;
339 } 357 }
340 } 358 }
359 mutex_unlock(&adev->mman.gtt_window_lock);
341 360
342 r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); 361 r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
343 dma_fence_put(fence); 362 dma_fence_put(fence);
344 return r; 363 return r;
345 364
346error: 365error:
366 mutex_unlock(&adev->mman.gtt_window_lock);
367
347 if (fence) 368 if (fence)
348 dma_fence_wait(fence, false); 369 dma_fence_wait(fence, false);
349 dma_fence_put(fence); 370 dma_fence_put(fence);
@@ -1253,6 +1274,72 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
1253 return ttm_bo_mmap(filp, vma, &adev->mman.bdev); 1274 return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
1254} 1275}
1255 1276
1277static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
1278 struct ttm_mem_reg *mem, unsigned num_pages,
1279 uint64_t offset, unsigned window,
1280 struct amdgpu_ring *ring,
1281 uint64_t *addr)
1282{
1283 struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
1284 struct amdgpu_device *adev = ring->adev;
1285 struct ttm_tt *ttm = bo->ttm;
1286 struct amdgpu_job *job;
1287 unsigned num_dw, num_bytes;
1288 dma_addr_t *dma_address;
1289 struct dma_fence *fence;
1290 uint64_t src_addr, dst_addr;
1291 uint64_t flags;
1292 int r;
1293
1294 BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
1295 AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
1296
1297 *addr = adev->mc.gtt_start;
1298 *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
1299 AMDGPU_GPU_PAGE_SIZE;
1300
1301 num_dw = adev->mman.buffer_funcs->copy_num_dw;
1302 while (num_dw & 0x7)
1303 num_dw++;
1304
1305 num_bytes = num_pages * 8;
1306
1307 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
1308 if (r)
1309 return r;
1310
1311 src_addr = num_dw * 4;
1312 src_addr += job->ibs[0].gpu_addr;
1313
1314 dst_addr = adev->gart.table_addr;
1315 dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
1316 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
1317 dst_addr, num_bytes);
1318
1319 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1320 WARN_ON(job->ibs[0].length_dw > num_dw);
1321
1322 dma_address = &gtt->ttm.dma_address[offset >> PAGE_SHIFT];
1323 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem);
1324 r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
1325 &job->ibs[0].ptr[num_dw]);
1326 if (r)
1327 goto error_free;
1328
1329 r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1330 AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
1331 if (r)
1332 goto error_free;
1333
1334 dma_fence_put(fence);
1335
1336 return r;
1337
1338error_free:
1339 amdgpu_job_free(job);
1340 return r;
1341}
1342
1256int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, 1343int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
1257 uint64_t dst_offset, uint32_t byte_count, 1344 uint64_t dst_offset, uint32_t byte_count,
1258 struct reservation_object *resv, 1345 struct reservation_object *resv,