aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c219
1 files changed, 153 insertions, 66 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 15a28578d458..51eacefadea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -44,6 +44,7 @@
44#include <linux/debugfs.h> 44#include <linux/debugfs.h>
45#include <linux/iommu.h> 45#include <linux/iommu.h>
46#include "amdgpu.h" 46#include "amdgpu.h"
47#include "amdgpu_object.h"
47#include "amdgpu_trace.h" 48#include "amdgpu_trace.h"
48#include "bif/bif_4_1_d.h" 49#include "bif/bif_4_1_d.h"
49 50
@@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
209 placement->num_busy_placement = 1; 210 placement->num_busy_placement = 1;
210 return; 211 return;
211 } 212 }
212 abo = container_of(bo, struct amdgpu_bo, tbo); 213 abo = ttm_to_amdgpu_bo(bo);
213 switch (bo->mem.mem_type) { 214 switch (bo->mem.mem_type) {
214 case TTM_PL_VRAM: 215 case TTM_PL_VRAM:
215 if (adev->mman.buffer_funcs && 216 if (adev->mman.buffer_funcs &&
@@ -257,7 +258,7 @@ gtt:
257 258
258static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) 259static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
259{ 260{
260 struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); 261 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
261 262
262 if (amdgpu_ttm_tt_get_usermm(bo->ttm)) 263 if (amdgpu_ttm_tt_get_usermm(bo->ttm))
263 return -EPERM; 264 return -EPERM;
@@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
289 return addr; 290 return addr;
290} 291}
291 292
292static int amdgpu_move_blit(struct ttm_buffer_object *bo, 293/**
293 bool evict, bool no_wait_gpu, 294 * amdgpu_find_mm_node - Helper function finds the drm_mm_node
294 struct ttm_mem_reg *new_mem, 295 * corresponding to @offset. It also modifies the offset to be
295 struct ttm_mem_reg *old_mem) 296 * within the drm_mm_node returned
297 */
298static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
299 unsigned long *offset)
296{ 300{
297 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); 301 struct drm_mm_node *mm_node = mem->mm_node;
298 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
299 302
300 struct drm_mm_node *old_mm, *new_mm; 303 while (*offset >= (mm_node->size << PAGE_SHIFT)) {
301 uint64_t old_start, old_size, new_start, new_size; 304 *offset -= (mm_node->size << PAGE_SHIFT);
302 unsigned long num_pages; 305 ++mm_node;
303 struct dma_fence *fence = NULL; 306 }
304 int r; 307 return mm_node;
308}
305 309
306 BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); 310/**
311 * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
312 *
313 * The function copies @size bytes from {src->mem + src->offset} to
314 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
315 * move and different for a BO to BO copy.
316 *
317 * @f: Returns the last fence if multiple jobs are submitted.
318 */
319int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
320 struct amdgpu_copy_mem *src,
321 struct amdgpu_copy_mem *dst,
322 uint64_t size,
323 struct reservation_object *resv,
324 struct dma_fence **f)
325{
326 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
327 struct drm_mm_node *src_mm, *dst_mm;
328 uint64_t src_node_start, dst_node_start, src_node_size,
329 dst_node_size, src_page_offset, dst_page_offset;
330 struct dma_fence *fence = NULL;
331 int r = 0;
332 const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
333 AMDGPU_GPU_PAGE_SIZE);
307 334
308 if (!ring->ready) { 335 if (!ring->ready) {
309 DRM_ERROR("Trying to move memory with ring turned off.\n"); 336 DRM_ERROR("Trying to move memory with ring turned off.\n");
310 return -EINVAL; 337 return -EINVAL;
311 } 338 }
312 339
313 old_mm = old_mem->mm_node; 340 src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
314 old_size = old_mm->size; 341 src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
315 old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); 342 src->offset;
343 src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
344 src_page_offset = src_node_start & (PAGE_SIZE - 1);
316 345
317 new_mm = new_mem->mm_node; 346 dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
318 new_size = new_mm->size; 347 dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
319 new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); 348 dst->offset;
349 dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
350 dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
320 351
321 num_pages = new_mem->num_pages;
322 mutex_lock(&adev->mman.gtt_window_lock); 352 mutex_lock(&adev->mman.gtt_window_lock);
323 while (num_pages) { 353
324 unsigned long cur_pages = min(min(old_size, new_size), 354 while (size) {
325 (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); 355 unsigned long cur_size;
326 uint64_t from = old_start, to = new_start; 356 uint64_t from = src_node_start, to = dst_node_start;
327 struct dma_fence *next; 357 struct dma_fence *next;
328 358
329 if (old_mem->mem_type == TTM_PL_TT && 359 /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
330 !amdgpu_gtt_mgr_is_allocated(old_mem)) { 360 * begins at an offset, then adjust the size accordingly
331 r = amdgpu_map_buffer(bo, old_mem, cur_pages, 361 */
332 old_start, 0, ring, &from); 362 cur_size = min3(min(src_node_size, dst_node_size), size,
363 GTT_MAX_BYTES);
364 if (cur_size + src_page_offset > GTT_MAX_BYTES ||
365 cur_size + dst_page_offset > GTT_MAX_BYTES)
366 cur_size -= max(src_page_offset, dst_page_offset);
367
368 /* Map only what needs to be accessed. Map src to window 0 and
369 * dst to window 1
370 */
371 if (src->mem->mem_type == TTM_PL_TT &&
372 !amdgpu_gtt_mgr_is_allocated(src->mem)) {
373 r = amdgpu_map_buffer(src->bo, src->mem,
374 PFN_UP(cur_size + src_page_offset),
375 src_node_start, 0, ring,
376 &from);
333 if (r) 377 if (r)
334 goto error; 378 goto error;
379 /* Adjust the offset because amdgpu_map_buffer returns
380 * start of mapped page
381 */
382 from += src_page_offset;
335 } 383 }
336 384
337 if (new_mem->mem_type == TTM_PL_TT && 385 if (dst->mem->mem_type == TTM_PL_TT &&
338 !amdgpu_gtt_mgr_is_allocated(new_mem)) { 386 !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
339 r = amdgpu_map_buffer(bo, new_mem, cur_pages, 387 r = amdgpu_map_buffer(dst->bo, dst->mem,
340 new_start, 1, ring, &to); 388 PFN_UP(cur_size + dst_page_offset),
389 dst_node_start, 1, ring,
390 &to);
341 if (r) 391 if (r)
342 goto error; 392 goto error;
393 to += dst_page_offset;
343 } 394 }
344 395
345 r = amdgpu_copy_buffer(ring, from, to, 396 r = amdgpu_copy_buffer(ring, from, to, cur_size,
346 cur_pages * PAGE_SIZE, 397 resv, &next, false, true);
347 bo->resv, &next, false, true);
348 if (r) 398 if (r)
349 goto error; 399 goto error;
350 400
351 dma_fence_put(fence); 401 dma_fence_put(fence);
352 fence = next; 402 fence = next;
353 403
354 num_pages -= cur_pages; 404 size -= cur_size;
355 if (!num_pages) 405 if (!size)
356 break; 406 break;
357 407
358 old_size -= cur_pages; 408 src_node_size -= cur_size;
359 if (!old_size) { 409 if (!src_node_size) {
360 old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); 410 src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
361 old_size = old_mm->size; 411 src->mem);
412 src_node_size = (src_mm->size << PAGE_SHIFT);
362 } else { 413 } else {
363 old_start += cur_pages * PAGE_SIZE; 414 src_node_start += cur_size;
415 src_page_offset = src_node_start & (PAGE_SIZE - 1);
364 } 416 }
365 417 dst_node_size -= cur_size;
366 new_size -= cur_pages; 418 if (!dst_node_size) {
367 if (!new_size) { 419 dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
368 new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); 420 dst->mem);
369 new_size = new_mm->size; 421 dst_node_size = (dst_mm->size << PAGE_SHIFT);
370 } else { 422 } else {
371 new_start += cur_pages * PAGE_SIZE; 423 dst_node_start += cur_size;
424 dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
372 } 425 }
373 } 426 }
427error:
374 mutex_unlock(&adev->mman.gtt_window_lock); 428 mutex_unlock(&adev->mman.gtt_window_lock);
429 if (f)
430 *f = dma_fence_get(fence);
431 dma_fence_put(fence);
432 return r;
433}
434
435
436static int amdgpu_move_blit(struct ttm_buffer_object *bo,
437 bool evict, bool no_wait_gpu,
438 struct ttm_mem_reg *new_mem,
439 struct ttm_mem_reg *old_mem)
440{
441 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
442 struct amdgpu_copy_mem src, dst;
443 struct dma_fence *fence = NULL;
444 int r;
445
446 src.bo = bo;
447 dst.bo = bo;
448 src.mem = old_mem;
449 dst.mem = new_mem;
450 src.offset = 0;
451 dst.offset = 0;
452
453 r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
454 new_mem->num_pages << PAGE_SHIFT,
455 bo->resv, &fence);
456 if (r)
457 goto error;
375 458
376 r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); 459 r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
377 dma_fence_put(fence); 460 dma_fence_put(fence);
378 return r; 461 return r;
379 462
380error: 463error:
381 mutex_unlock(&adev->mman.gtt_window_lock);
382
383 if (fence) 464 if (fence)
384 dma_fence_wait(fence, false); 465 dma_fence_wait(fence, false);
385 dma_fence_put(fence); 466 dma_fence_put(fence);
@@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
484 int r; 565 int r;
485 566
486 /* Can't move a pinned BO */ 567 /* Can't move a pinned BO */
487 abo = container_of(bo, struct amdgpu_bo, tbo); 568 abo = ttm_to_amdgpu_bo(bo);
488 if (WARN_ON_ONCE(abo->pin_count > 0)) 569 if (WARN_ON_ONCE(abo->pin_count > 0))
489 return -EINVAL; 570 return -EINVAL;
490 571
@@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
582static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, 663static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
583 unsigned long page_offset) 664 unsigned long page_offset)
584{ 665{
585 struct drm_mm_node *mm = bo->mem.mm_node; 666 struct drm_mm_node *mm;
586 uint64_t size = mm->size; 667 unsigned long offset = (page_offset << PAGE_SHIFT);
587 uint64_t offset = page_offset;
588 668
589 page_offset = do_div(offset, size); 669 mm = amdgpu_find_mm_node(&bo->mem, &offset);
590 mm += offset; 670 return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
591 return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset; 671 (offset >> PAGE_SHIFT);
592} 672}
593 673
594/* 674/*
@@ -1142,9 +1222,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1142 unsigned long offset, 1222 unsigned long offset,
1143 void *buf, int len, int write) 1223 void *buf, int len, int write)
1144{ 1224{
1145 struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); 1225 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1146 struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); 1226 struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1147 struct drm_mm_node *nodes = abo->tbo.mem.mm_node; 1227 struct drm_mm_node *nodes;
1148 uint32_t value = 0; 1228 uint32_t value = 0;
1149 int ret = 0; 1229 int ret = 0;
1150 uint64_t pos; 1230 uint64_t pos;
@@ -1153,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1153 if (bo->mem.mem_type != TTM_PL_VRAM) 1233 if (bo->mem.mem_type != TTM_PL_VRAM)
1154 return -EIO; 1234 return -EIO;
1155 1235
1156 while (offset >= (nodes->size << PAGE_SHIFT)) { 1236 nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
1157 offset -= nodes->size << PAGE_SHIFT;
1158 ++nodes;
1159 }
1160 pos = (nodes->start << PAGE_SHIFT) + offset; 1237 pos = (nodes->start << PAGE_SHIFT) + offset;
1161 1238
1162 while (len && pos < adev->mc.mc_vram_size) { 1239 while (len && pos < adev->mc.mc_vram_size) {
@@ -1255,6 +1332,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1255 /* Change the size here instead of the init above so only lpfn is affected */ 1332 /* Change the size here instead of the init above so only lpfn is affected */
1256 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 1333 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
1257 1334
1335 /*
1336 *The reserved vram for firmware must be pinned to the specified
1337 *place on the VRAM, so reserve it early.
1338 */
1339 r = amdgpu_fw_reserve_vram_init(adev);
1340 if (r) {
1341 return r;
1342 }
1343
1258 r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, 1344 r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
1259 AMDGPU_GEM_DOMAIN_VRAM, 1345 AMDGPU_GEM_DOMAIN_VRAM,
1260 &adev->stolen_vga_memory, 1346 &adev->stolen_vga_memory,
@@ -1479,7 +1565,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
1479 job->vm_needs_flush = vm_needs_flush; 1565 job->vm_needs_flush = vm_needs_flush;
1480 if (resv) { 1566 if (resv) {
1481 r = amdgpu_sync_resv(adev, &job->sync, resv, 1567 r = amdgpu_sync_resv(adev, &job->sync, resv,
1482 AMDGPU_FENCE_OWNER_UNDEFINED); 1568 AMDGPU_FENCE_OWNER_UNDEFINED,
1569 false);
1483 if (r) { 1570 if (r) {
1484 DRM_ERROR("sync failed (%d).\n", r); 1571 DRM_ERROR("sync failed (%d).\n", r);
1485 goto error_free; 1572 goto error_free;
@@ -1571,7 +1658,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1571 1658
1572 if (resv) { 1659 if (resv) {
1573 r = amdgpu_sync_resv(adev, &job->sync, resv, 1660 r = amdgpu_sync_resv(adev, &job->sync, resv,
1574 AMDGPU_FENCE_OWNER_UNDEFINED); 1661 AMDGPU_FENCE_OWNER_UNDEFINED, false);
1575 if (r) { 1662 if (r) {
1576 DRM_ERROR("sync failed (%d).\n", r); 1663 DRM_ERROR("sync failed (%d).\n", r);
1577 goto error_free; 1664 goto error_free;