diff options
author | Christian König <christian.koenig@amd.com> | 2017-06-30 05:05:54 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-07-14 11:06:21 -0400 |
commit | abca90f1c8103528ca4b194fdc69e933bd23db4c (patch) | |
tree | 3c020449c92270995b55e41fd4e81ad8d0101fc5 /drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |
parent | 0c2c421e2657da6eece66bd22eaaedf21dcebef7 (diff) |
drm/amdgpu: use the GTT windows for BO moves v2
This way we don't need to map the full BO at a time any more.
v2: use fixed windows for src/dst
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 125 |
1 files changed, 106 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ace178b393dd..4a34b61d44ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -47,10 +47,15 @@ | |||
47 | 47 | ||
48 | #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) | 48 | #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) |
49 | 49 | ||
50 | static int amdgpu_map_buffer(struct ttm_buffer_object *bo, | ||
51 | struct ttm_mem_reg *mem, unsigned num_pages, | ||
52 | uint64_t offset, unsigned window, | ||
53 | struct amdgpu_ring *ring, | ||
54 | uint64_t *addr); | ||
55 | |||
50 | static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); | 56 | static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); |
51 | static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); | 57 | static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); |
52 | 58 | ||
53 | |||
54 | /* | 59 | /* |
55 | * Global memory. | 60 | * Global memory. |
56 | */ | 61 | */ |
@@ -97,6 +102,8 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) | |||
97 | goto error_bo; | 102 | goto error_bo; |
98 | } | 103 | } |
99 | 104 | ||
105 | mutex_init(&adev->mman.gtt_window_lock); | ||
106 | |||
100 | ring = adev->mman.buffer_funcs_ring; | 107 | ring = adev->mman.buffer_funcs_ring; |
101 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; | 108 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; |
102 | r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, | 109 | r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, |
@@ -123,6 +130,7 @@ static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) | |||
123 | if (adev->mman.mem_global_referenced) { | 130 | if (adev->mman.mem_global_referenced) { |
124 | amd_sched_entity_fini(adev->mman.entity.sched, | 131 | amd_sched_entity_fini(adev->mman.entity.sched, |
125 | &adev->mman.entity); | 132 | &adev->mman.entity); |
133 | mutex_destroy(&adev->mman.gtt_window_lock); | ||
126 | drm_global_item_unref(&adev->mman.bo_global_ref.ref); | 134 | drm_global_item_unref(&adev->mman.bo_global_ref.ref); |
127 | drm_global_item_unref(&adev->mman.mem_global_ref); | 135 | drm_global_item_unref(&adev->mman.mem_global_ref); |
128 | adev->mman.mem_global_referenced = false; | 136 | adev->mman.mem_global_referenced = false; |
@@ -256,10 +264,13 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, | |||
256 | struct drm_mm_node *mm_node, | 264 | struct drm_mm_node *mm_node, |
257 | struct ttm_mem_reg *mem) | 265 | struct ttm_mem_reg *mem) |
258 | { | 266 | { |
259 | uint64_t addr; | 267 | uint64_t addr = 0; |
260 | 268 | ||
261 | addr = mm_node->start << PAGE_SHIFT; | 269 | if (mem->mem_type != TTM_PL_TT || |
262 | addr += bo->bdev->man[mem->mem_type].gpu_offset; | 270 | amdgpu_gtt_mgr_is_allocated(mem)) { |
271 | addr = mm_node->start << PAGE_SHIFT; | ||
272 | addr += bo->bdev->man[mem->mem_type].gpu_offset; | ||
273 | } | ||
263 | return addr; | 274 | return addr; |
264 | } | 275 | } |
265 | 276 | ||
@@ -284,34 +295,41 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, | |||
284 | return -EINVAL; | 295 | return -EINVAL; |
285 | } | 296 | } |
286 | 297 | ||
287 | if (old_mem->mem_type == TTM_PL_TT) { | ||
288 | r = amdgpu_ttm_bind(bo, old_mem); | ||
289 | if (r) | ||
290 | return r; | ||
291 | } | ||
292 | |||
293 | old_mm = old_mem->mm_node; | 298 | old_mm = old_mem->mm_node; |
294 | old_size = old_mm->size; | 299 | old_size = old_mm->size; |
295 | old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); | 300 | old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); |
296 | 301 | ||
297 | if (new_mem->mem_type == TTM_PL_TT) { | ||
298 | r = amdgpu_ttm_bind(bo, new_mem); | ||
299 | if (r) | ||
300 | return r; | ||
301 | } | ||
302 | |||
303 | new_mm = new_mem->mm_node; | 302 | new_mm = new_mem->mm_node; |
304 | new_size = new_mm->size; | 303 | new_size = new_mm->size; |
305 | new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); | 304 | new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); |
306 | 305 | ||
307 | num_pages = new_mem->num_pages; | 306 | num_pages = new_mem->num_pages; |
307 | mutex_lock(&adev->mman.gtt_window_lock); | ||
308 | while (num_pages) { | 308 | while (num_pages) { |
309 | unsigned long cur_pages = min(old_size, new_size); | 309 | unsigned long cur_pages = min(min(old_size, new_size), |
310 | (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); | ||
311 | uint64_t from = old_start, to = new_start; | ||
310 | struct dma_fence *next; | 312 | struct dma_fence *next; |
311 | 313 | ||
312 | r = amdgpu_copy_buffer(ring, old_start, new_start, | 314 | if (old_mem->mem_type == TTM_PL_TT && |
315 | !amdgpu_gtt_mgr_is_allocated(old_mem)) { | ||
316 | r = amdgpu_map_buffer(bo, old_mem, cur_pages, | ||
317 | old_start, 0, ring, &from); | ||
318 | if (r) | ||
319 | goto error; | ||
320 | } | ||
321 | |||
322 | if (new_mem->mem_type == TTM_PL_TT && | ||
323 | !amdgpu_gtt_mgr_is_allocated(new_mem)) { | ||
324 | r = amdgpu_map_buffer(bo, new_mem, cur_pages, | ||
325 | new_start, 1, ring, &to); | ||
326 | if (r) | ||
327 | goto error; | ||
328 | } | ||
329 | |||
330 | r = amdgpu_copy_buffer(ring, from, to, | ||
313 | cur_pages * PAGE_SIZE, | 331 | cur_pages * PAGE_SIZE, |
314 | bo->resv, &next, false, false); | 332 | bo->resv, &next, false, true); |
315 | if (r) | 333 | if (r) |
316 | goto error; | 334 | goto error; |
317 | 335 | ||
@@ -338,12 +356,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, | |||
338 | new_start += cur_pages * PAGE_SIZE; | 356 | new_start += cur_pages * PAGE_SIZE; |
339 | } | 357 | } |
340 | } | 358 | } |
359 | mutex_unlock(&adev->mman.gtt_window_lock); | ||
341 | 360 | ||
342 | r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); | 361 | r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); |
343 | dma_fence_put(fence); | 362 | dma_fence_put(fence); |
344 | return r; | 363 | return r; |
345 | 364 | ||
346 | error: | 365 | error: |
366 | mutex_unlock(&adev->mman.gtt_window_lock); | ||
367 | |||
347 | if (fence) | 368 | if (fence) |
348 | dma_fence_wait(fence, false); | 369 | dma_fence_wait(fence, false); |
349 | dma_fence_put(fence); | 370 | dma_fence_put(fence); |
@@ -1253,6 +1274,72 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1253 | return ttm_bo_mmap(filp, vma, &adev->mman.bdev); | 1274 | return ttm_bo_mmap(filp, vma, &adev->mman.bdev); |
1254 | } | 1275 | } |
1255 | 1276 | ||
1277 | static int amdgpu_map_buffer(struct ttm_buffer_object *bo, | ||
1278 | struct ttm_mem_reg *mem, unsigned num_pages, | ||
1279 | uint64_t offset, unsigned window, | ||
1280 | struct amdgpu_ring *ring, | ||
1281 | uint64_t *addr) | ||
1282 | { | ||
1283 | struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; | ||
1284 | struct amdgpu_device *adev = ring->adev; | ||
1285 | struct ttm_tt *ttm = bo->ttm; | ||
1286 | struct amdgpu_job *job; | ||
1287 | unsigned num_dw, num_bytes; | ||
1288 | dma_addr_t *dma_address; | ||
1289 | struct dma_fence *fence; | ||
1290 | uint64_t src_addr, dst_addr; | ||
1291 | uint64_t flags; | ||
1292 | int r; | ||
1293 | |||
1294 | BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < | ||
1295 | AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); | ||
1296 | |||
1297 | *addr = adev->mc.gtt_start; | ||
1298 | *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * | ||
1299 | AMDGPU_GPU_PAGE_SIZE; | ||
1300 | |||
1301 | num_dw = adev->mman.buffer_funcs->copy_num_dw; | ||
1302 | while (num_dw & 0x7) | ||
1303 | num_dw++; | ||
1304 | |||
1305 | num_bytes = num_pages * 8; | ||
1306 | |||
1307 | r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); | ||
1308 | if (r) | ||
1309 | return r; | ||
1310 | |||
1311 | src_addr = num_dw * 4; | ||
1312 | src_addr += job->ibs[0].gpu_addr; | ||
1313 | |||
1314 | dst_addr = adev->gart.table_addr; | ||
1315 | dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; | ||
1316 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, | ||
1317 | dst_addr, num_bytes); | ||
1318 | |||
1319 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); | ||
1320 | WARN_ON(job->ibs[0].length_dw > num_dw); | ||
1321 | |||
1322 | dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; | ||
1323 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); | ||
1324 | r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, | ||
1325 | &job->ibs[0].ptr[num_dw]); | ||
1326 | if (r) | ||
1327 | goto error_free; | ||
1328 | |||
1329 | r = amdgpu_job_submit(job, ring, &adev->mman.entity, | ||
1330 | AMDGPU_FENCE_OWNER_UNDEFINED, &fence); | ||
1331 | if (r) | ||
1332 | goto error_free; | ||
1333 | |||
1334 | dma_fence_put(fence); | ||
1335 | |||
1336 | return r; | ||
1337 | |||
1338 | error_free: | ||
1339 | amdgpu_job_free(job); | ||
1340 | return r; | ||
1341 | } | ||
1342 | |||
1256 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, | 1343 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, |
1257 | uint64_t dst_offset, uint32_t byte_count, | 1344 | uint64_t dst_offset, uint32_t byte_count, |
1258 | struct reservation_object *resv, | 1345 | struct reservation_object *resv, |