diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 219 |
1 files changed, 153 insertions, 66 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 15a28578d458..51eacefadea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/debugfs.h> | 44 | #include <linux/debugfs.h> |
45 | #include <linux/iommu.h> | 45 | #include <linux/iommu.h> |
46 | #include "amdgpu.h" | 46 | #include "amdgpu.h" |
47 | #include "amdgpu_object.h" | ||
47 | #include "amdgpu_trace.h" | 48 | #include "amdgpu_trace.h" |
48 | #include "bif/bif_4_1_d.h" | 49 | #include "bif/bif_4_1_d.h" |
49 | 50 | ||
@@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, | |||
209 | placement->num_busy_placement = 1; | 210 | placement->num_busy_placement = 1; |
210 | return; | 211 | return; |
211 | } | 212 | } |
212 | abo = container_of(bo, struct amdgpu_bo, tbo); | 213 | abo = ttm_to_amdgpu_bo(bo); |
213 | switch (bo->mem.mem_type) { | 214 | switch (bo->mem.mem_type) { |
214 | case TTM_PL_VRAM: | 215 | case TTM_PL_VRAM: |
215 | if (adev->mman.buffer_funcs && | 216 | if (adev->mman.buffer_funcs && |
@@ -257,7 +258,7 @@ gtt: | |||
257 | 258 | ||
258 | static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) | 259 | static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) |
259 | { | 260 | { |
260 | struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); | 261 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); |
261 | 262 | ||
262 | if (amdgpu_ttm_tt_get_usermm(bo->ttm)) | 263 | if (amdgpu_ttm_tt_get_usermm(bo->ttm)) |
263 | return -EPERM; | 264 | return -EPERM; |
@@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, | |||
289 | return addr; | 290 | return addr; |
290 | } | 291 | } |
291 | 292 | ||
292 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, | 293 | /** |
293 | bool evict, bool no_wait_gpu, | 294 | * amdgpu_find_mm_node - Helper function finds the drm_mm_node |
294 | struct ttm_mem_reg *new_mem, | 295 | * corresponding to @offset. It also modifies the offset to be |
295 | struct ttm_mem_reg *old_mem) | 296 | * within the drm_mm_node returned |
297 | */ | ||
298 | static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, | ||
299 | unsigned long *offset) | ||
296 | { | 300 | { |
297 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); | 301 | struct drm_mm_node *mm_node = mem->mm_node; |
298 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; | ||
299 | 302 | ||
300 | struct drm_mm_node *old_mm, *new_mm; | 303 | while (*offset >= (mm_node->size << PAGE_SHIFT)) { |
301 | uint64_t old_start, old_size, new_start, new_size; | 304 | *offset -= (mm_node->size << PAGE_SHIFT); |
302 | unsigned long num_pages; | 305 | ++mm_node; |
303 | struct dma_fence *fence = NULL; | 306 | } |
304 | int r; | 307 | return mm_node; |
308 | } | ||
305 | 309 | ||
306 | BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); | 310 | /** |
311 | * amdgpu_copy_ttm_mem_to_mem - Helper function for copy | ||
312 | * | ||
313 | * The function copies @size bytes from {src->mem + src->offset} to | ||
314 | * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a | ||
315 | * move and different for a BO to BO copy. | ||
316 | * | ||
317 | * @f: Returns the last fence if multiple jobs are submitted. | ||
318 | */ | ||
319 | int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, | ||
320 | struct amdgpu_copy_mem *src, | ||
321 | struct amdgpu_copy_mem *dst, | ||
322 | uint64_t size, | ||
323 | struct reservation_object *resv, | ||
324 | struct dma_fence **f) | ||
325 | { | ||
326 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; | ||
327 | struct drm_mm_node *src_mm, *dst_mm; | ||
328 | uint64_t src_node_start, dst_node_start, src_node_size, | ||
329 | dst_node_size, src_page_offset, dst_page_offset; | ||
330 | struct dma_fence *fence = NULL; | ||
331 | int r = 0; | ||
332 | const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * | ||
333 | AMDGPU_GPU_PAGE_SIZE); | ||
307 | 334 | ||
308 | if (!ring->ready) { | 335 | if (!ring->ready) { |
309 | DRM_ERROR("Trying to move memory with ring turned off.\n"); | 336 | DRM_ERROR("Trying to move memory with ring turned off.\n"); |
310 | return -EINVAL; | 337 | return -EINVAL; |
311 | } | 338 | } |
312 | 339 | ||
313 | old_mm = old_mem->mm_node; | 340 | src_mm = amdgpu_find_mm_node(src->mem, &src->offset); |
314 | old_size = old_mm->size; | 341 | src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + |
315 | old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); | 342 | src->offset; |
343 | src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; | ||
344 | src_page_offset = src_node_start & (PAGE_SIZE - 1); | ||
316 | 345 | ||
317 | new_mm = new_mem->mm_node; | 346 | dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); |
318 | new_size = new_mm->size; | 347 | dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + |
319 | new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); | 348 | dst->offset; |
349 | dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; | ||
350 | dst_page_offset = dst_node_start & (PAGE_SIZE - 1); | ||
320 | 351 | ||
321 | num_pages = new_mem->num_pages; | ||
322 | mutex_lock(&adev->mman.gtt_window_lock); | 352 | mutex_lock(&adev->mman.gtt_window_lock); |
323 | while (num_pages) { | 353 | |
324 | unsigned long cur_pages = min(min(old_size, new_size), | 354 | while (size) { |
325 | (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); | 355 | unsigned long cur_size; |
326 | uint64_t from = old_start, to = new_start; | 356 | uint64_t from = src_node_start, to = dst_node_start; |
327 | struct dma_fence *next; | 357 | struct dma_fence *next; |
328 | 358 | ||
329 | if (old_mem->mem_type == TTM_PL_TT && | 359 | /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst |
330 | !amdgpu_gtt_mgr_is_allocated(old_mem)) { | 360 | * begins at an offset, then adjust the size accordingly |
331 | r = amdgpu_map_buffer(bo, old_mem, cur_pages, | 361 | */ |
332 | old_start, 0, ring, &from); | 362 | cur_size = min3(min(src_node_size, dst_node_size), size, |
363 | GTT_MAX_BYTES); | ||
364 | if (cur_size + src_page_offset > GTT_MAX_BYTES || | ||
365 | cur_size + dst_page_offset > GTT_MAX_BYTES) | ||
366 | cur_size -= max(src_page_offset, dst_page_offset); | ||
367 | |||
368 | /* Map only what needs to be accessed. Map src to window 0 and | ||
369 | * dst to window 1 | ||
370 | */ | ||
371 | if (src->mem->mem_type == TTM_PL_TT && | ||
372 | !amdgpu_gtt_mgr_is_allocated(src->mem)) { | ||
373 | r = amdgpu_map_buffer(src->bo, src->mem, | ||
374 | PFN_UP(cur_size + src_page_offset), | ||
375 | src_node_start, 0, ring, | ||
376 | &from); | ||
333 | if (r) | 377 | if (r) |
334 | goto error; | 378 | goto error; |
379 | /* Adjust the offset because amdgpu_map_buffer returns | ||
380 | * start of mapped page | ||
381 | */ | ||
382 | from += src_page_offset; | ||
335 | } | 383 | } |
336 | 384 | ||
337 | if (new_mem->mem_type == TTM_PL_TT && | 385 | if (dst->mem->mem_type == TTM_PL_TT && |
338 | !amdgpu_gtt_mgr_is_allocated(new_mem)) { | 386 | !amdgpu_gtt_mgr_is_allocated(dst->mem)) { |
339 | r = amdgpu_map_buffer(bo, new_mem, cur_pages, | 387 | r = amdgpu_map_buffer(dst->bo, dst->mem, |
340 | new_start, 1, ring, &to); | 388 | PFN_UP(cur_size + dst_page_offset), |
389 | dst_node_start, 1, ring, | ||
390 | &to); | ||
341 | if (r) | 391 | if (r) |
342 | goto error; | 392 | goto error; |
393 | to += dst_page_offset; | ||
343 | } | 394 | } |
344 | 395 | ||
345 | r = amdgpu_copy_buffer(ring, from, to, | 396 | r = amdgpu_copy_buffer(ring, from, to, cur_size, |
346 | cur_pages * PAGE_SIZE, | 397 | resv, &next, false, true); |
347 | bo->resv, &next, false, true); | ||
348 | if (r) | 398 | if (r) |
349 | goto error; | 399 | goto error; |
350 | 400 | ||
351 | dma_fence_put(fence); | 401 | dma_fence_put(fence); |
352 | fence = next; | 402 | fence = next; |
353 | 403 | ||
354 | num_pages -= cur_pages; | 404 | size -= cur_size; |
355 | if (!num_pages) | 405 | if (!size) |
356 | break; | 406 | break; |
357 | 407 | ||
358 | old_size -= cur_pages; | 408 | src_node_size -= cur_size; |
359 | if (!old_size) { | 409 | if (!src_node_size) { |
360 | old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); | 410 | src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, |
361 | old_size = old_mm->size; | 411 | src->mem); |
412 | src_node_size = (src_mm->size << PAGE_SHIFT); | ||
362 | } else { | 413 | } else { |
363 | old_start += cur_pages * PAGE_SIZE; | 414 | src_node_start += cur_size; |
415 | src_page_offset = src_node_start & (PAGE_SIZE - 1); | ||
364 | } | 416 | } |
365 | 417 | dst_node_size -= cur_size; | |
366 | new_size -= cur_pages; | 418 | if (!dst_node_size) { |
367 | if (!new_size) { | 419 | dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, |
368 | new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); | 420 | dst->mem); |
369 | new_size = new_mm->size; | 421 | dst_node_size = (dst_mm->size << PAGE_SHIFT); |
370 | } else { | 422 | } else { |
371 | new_start += cur_pages * PAGE_SIZE; | 423 | dst_node_start += cur_size; |
424 | dst_page_offset = dst_node_start & (PAGE_SIZE - 1); | ||
372 | } | 425 | } |
373 | } | 426 | } |
427 | error: | ||
374 | mutex_unlock(&adev->mman.gtt_window_lock); | 428 | mutex_unlock(&adev->mman.gtt_window_lock); |
429 | if (f) | ||
430 | *f = dma_fence_get(fence); | ||
431 | dma_fence_put(fence); | ||
432 | return r; | ||
433 | } | ||
434 | |||
435 | |||
436 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, | ||
437 | bool evict, bool no_wait_gpu, | ||
438 | struct ttm_mem_reg *new_mem, | ||
439 | struct ttm_mem_reg *old_mem) | ||
440 | { | ||
441 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); | ||
442 | struct amdgpu_copy_mem src, dst; | ||
443 | struct dma_fence *fence = NULL; | ||
444 | int r; | ||
445 | |||
446 | src.bo = bo; | ||
447 | dst.bo = bo; | ||
448 | src.mem = old_mem; | ||
449 | dst.mem = new_mem; | ||
450 | src.offset = 0; | ||
451 | dst.offset = 0; | ||
452 | |||
453 | r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, | ||
454 | new_mem->num_pages << PAGE_SHIFT, | ||
455 | bo->resv, &fence); | ||
456 | if (r) | ||
457 | goto error; | ||
375 | 458 | ||
376 | r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); | 459 | r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); |
377 | dma_fence_put(fence); | 460 | dma_fence_put(fence); |
378 | return r; | 461 | return r; |
379 | 462 | ||
380 | error: | 463 | error: |
381 | mutex_unlock(&adev->mman.gtt_window_lock); | ||
382 | |||
383 | if (fence) | 464 | if (fence) |
384 | dma_fence_wait(fence, false); | 465 | dma_fence_wait(fence, false); |
385 | dma_fence_put(fence); | 466 | dma_fence_put(fence); |
@@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, | |||
484 | int r; | 565 | int r; |
485 | 566 | ||
486 | /* Can't move a pinned BO */ | 567 | /* Can't move a pinned BO */ |
487 | abo = container_of(bo, struct amdgpu_bo, tbo); | 568 | abo = ttm_to_amdgpu_bo(bo); |
488 | if (WARN_ON_ONCE(abo->pin_count > 0)) | 569 | if (WARN_ON_ONCE(abo->pin_count > 0)) |
489 | return -EINVAL; | 570 | return -EINVAL; |
490 | 571 | ||
@@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re | |||
582 | static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, | 663 | static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, |
583 | unsigned long page_offset) | 664 | unsigned long page_offset) |
584 | { | 665 | { |
585 | struct drm_mm_node *mm = bo->mem.mm_node; | 666 | struct drm_mm_node *mm; |
586 | uint64_t size = mm->size; | 667 | unsigned long offset = (page_offset << PAGE_SHIFT); |
587 | uint64_t offset = page_offset; | ||
588 | 668 | ||
589 | page_offset = do_div(offset, size); | 669 | mm = amdgpu_find_mm_node(&bo->mem, &offset); |
590 | mm += offset; | 670 | return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + |
591 | return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset; | 671 | (offset >> PAGE_SHIFT); |
592 | } | 672 | } |
593 | 673 | ||
594 | /* | 674 | /* |
@@ -1142,9 +1222,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, | |||
1142 | unsigned long offset, | 1222 | unsigned long offset, |
1143 | void *buf, int len, int write) | 1223 | void *buf, int len, int write) |
1144 | { | 1224 | { |
1145 | struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); | 1225 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); |
1146 | struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); | 1226 | struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); |
1147 | struct drm_mm_node *nodes = abo->tbo.mem.mm_node; | 1227 | struct drm_mm_node *nodes; |
1148 | uint32_t value = 0; | 1228 | uint32_t value = 0; |
1149 | int ret = 0; | 1229 | int ret = 0; |
1150 | uint64_t pos; | 1230 | uint64_t pos; |
@@ -1153,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, | |||
1153 | if (bo->mem.mem_type != TTM_PL_VRAM) | 1233 | if (bo->mem.mem_type != TTM_PL_VRAM) |
1154 | return -EIO; | 1234 | return -EIO; |
1155 | 1235 | ||
1156 | while (offset >= (nodes->size << PAGE_SHIFT)) { | 1236 | nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); |
1157 | offset -= nodes->size << PAGE_SHIFT; | ||
1158 | ++nodes; | ||
1159 | } | ||
1160 | pos = (nodes->start << PAGE_SHIFT) + offset; | 1237 | pos = (nodes->start << PAGE_SHIFT) + offset; |
1161 | 1238 | ||
1162 | while (len && pos < adev->mc.mc_vram_size) { | 1239 | while (len && pos < adev->mc.mc_vram_size) { |
@@ -1255,6 +1332,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
1255 | /* Change the size here instead of the init above so only lpfn is affected */ | 1332 | /* Change the size here instead of the init above so only lpfn is affected */ |
1256 | amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); | 1333 | amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); |
1257 | 1334 | ||
1335 | /* | ||
1336 | *The reserved vram for firmware must be pinned to the specified | ||
1337 | *place on the VRAM, so reserve it early. | ||
1338 | */ | ||
1339 | r = amdgpu_fw_reserve_vram_init(adev); | ||
1340 | if (r) { | ||
1341 | return r; | ||
1342 | } | ||
1343 | |||
1258 | r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, | 1344 | r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, |
1259 | AMDGPU_GEM_DOMAIN_VRAM, | 1345 | AMDGPU_GEM_DOMAIN_VRAM, |
1260 | &adev->stolen_vga_memory, | 1346 | &adev->stolen_vga_memory, |
@@ -1479,7 +1565,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, | |||
1479 | job->vm_needs_flush = vm_needs_flush; | 1565 | job->vm_needs_flush = vm_needs_flush; |
1480 | if (resv) { | 1566 | if (resv) { |
1481 | r = amdgpu_sync_resv(adev, &job->sync, resv, | 1567 | r = amdgpu_sync_resv(adev, &job->sync, resv, |
1482 | AMDGPU_FENCE_OWNER_UNDEFINED); | 1568 | AMDGPU_FENCE_OWNER_UNDEFINED, |
1569 | false); | ||
1483 | if (r) { | 1570 | if (r) { |
1484 | DRM_ERROR("sync failed (%d).\n", r); | 1571 | DRM_ERROR("sync failed (%d).\n", r); |
1485 | goto error_free; | 1572 | goto error_free; |
@@ -1571,7 +1658,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, | |||
1571 | 1658 | ||
1572 | if (resv) { | 1659 | if (resv) { |
1573 | r = amdgpu_sync_resv(adev, &job->sync, resv, | 1660 | r = amdgpu_sync_resv(adev, &job->sync, resv, |
1574 | AMDGPU_FENCE_OWNER_UNDEFINED); | 1661 | AMDGPU_FENCE_OWNER_UNDEFINED, false); |
1575 | if (r) { | 1662 | if (r) { |
1576 | DRM_ERROR("sync failed (%d).\n", r); | 1663 | DRM_ERROR("sync failed (%d).\n", r); |
1577 | goto error_free; | 1664 | goto error_free; |