1 files changed, 153 insertions, 66 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 15a28578d458..51eacefadea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -44,6 +44,7 @@
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
 #include "amdgpu.h"
+#include "amdgpu_object.h"
 #include "amdgpu_trace.h"
 #include "bif/bif_4_1_d.h"
@@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
                placement->num_busy_placement = 1;
                return;
        }
-        abo = container_of(bo, struct amdgpu_bo, tbo);
+        abo = ttm_to_amdgpu_bo(bo);
        switch (bo->mem.mem_type) {
        case TTM_PL_VRAM:
                if (adev->mman.buffer_funcs &&
@@ -257,7 +258,7 @@ gtt:
 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-        struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
        if (amdgpu_ttm_tt_get_usermm(bo->ttm))
                return -EPERM;
@@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
        return addr;
 }
-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+/**
-                            bool evict, bool no_wait_gpu,
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
-                            struct ttm_mem_reg *new_mem,
+ *  corresponding to @offset. It also modifies the offset to be
-                            struct ttm_mem_reg *old_mem)
+ *  within the drm_mm_node returned
+ */
+static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
+                                               unsigned long *offset)
 {
-        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+        struct drm_mm_node *mm_node = mem->mm_node;
-        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-        struct drm_mm_node *old_mm, *new_mm;
+        while (*offset >= (mm_node->size << PAGE_SHIFT)) {
-        uint64_t old_start, old_size, new_start, new_size;
+                *offset -= (mm_node->size << PAGE_SHIFT);
-        unsigned long num_pages;
+                ++mm_node;
-        struct dma_fence *fence = NULL;
+        }
-        int r;
+        return mm_node;
+}
-        BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+/**
+ * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
+ *
+ * The function copies @size bytes from {src->mem + src->offset} to
+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
+ * move and different for a BO to BO copy.
+ *
+ * @f: Returns the last fence if multiple jobs are submitted.
+ */
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+                               struct amdgpu_copy_mem *src,
+                               struct amdgpu_copy_mem *dst,
+                               uint64_t size,
+                               struct reservation_object *resv,
+                               struct dma_fence **f)
+{
+        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+        struct drm_mm_node *src_mm, *dst_mm;
+        uint64_t src_node_start, dst_node_start, src_node_size,
+                 dst_node_size, src_page_offset, dst_page_offset;
+        struct dma_fence *fence = NULL;
+        int r = 0;
+        const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
+                                        AMDGPU_GPU_PAGE_SIZE);
        if (!ring->ready) {
                DRM_ERROR("Trying to move memory with ring turned off.\n");
                return -EINVAL;
        }
-        old_mm = old_mem->mm_node;
+        src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
-        old_size = old_mm->size;
+        src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
-        old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
+                                             src->offset;
+        src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
+        src_page_offset = src_node_start & (PAGE_SIZE - 1);
-        new_mm = new_mem->mm_node;
+        dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
-        new_size = new_mm->size;
+        dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
-        new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
+                                             dst->offset;
+        dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
+        dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
-        num_pages = new_mem->num_pages;
        mutex_lock(&adev->mman.gtt_window_lock);
-        while (num_pages) {
-                unsigned long cur_pages = min(min(old_size, new_size),
+        while (size) {
-                                              (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
+                unsigned long cur_size;
-                uint64_t from = old_start, to = new_start;
+                uint64_t from = src_node_start, to = dst_node_start;
                struct dma_fence *next;
-                if (old_mem->mem_type == TTM_PL_TT &&
+                /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
-                    !amdgpu_gtt_mgr_is_allocated(old_mem)) {
+                 * begins at an offset, then adjust the size accordingly
-                        r = amdgpu_map_buffer(bo, old_mem, cur_pages,
+                 */
-                                              old_start, 0, ring, &from);
+                cur_size = min3(min(src_node_size, dst_node_size), size,
+                                GTT_MAX_BYTES);
+                if (cur_size + src_page_offset > GTT_MAX_BYTES ||
+                    cur_size + dst_page_offset > GTT_MAX_BYTES)
+                        cur_size -= max(src_page_offset, dst_page_offset);
+                /* Map only what needs to be accessed. Map src to window 0 and
+                 * dst to window 1
+                 */
+                if (src->mem->mem_type == TTM_PL_TT &&
+                    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
+                        r = amdgpu_map_buffer(src->bo, src->mem,
+                                        PFN_UP(cur_size + src_page_offset),
+                                        src_node_start, 0, ring,
+                                        &from);
                        if (r)
                                goto error;
+                        /* Adjust the offset because amdgpu_map_buffer returns
+                         * start of mapped page
+                         */
+                        from += src_page_offset;
                }
-                if (new_mem->mem_type == TTM_PL_TT &&
+                if (dst->mem->mem_type == TTM_PL_TT &&
-                    !amdgpu_gtt_mgr_is_allocated(new_mem)) {
+                    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
-                        r = amdgpu_map_buffer(bo, new_mem, cur_pages,
+                        r = amdgpu_map_buffer(dst->bo, dst->mem,
-                                              new_start, 1, ring, &to);
+                                        PFN_UP(cur_size + dst_page_offset),
+                                        dst_node_start, 1, ring,
+                                        &to);
                        if (r)
                                goto error;
+                        to += dst_page_offset;
                }
-                r = amdgpu_copy_buffer(ring, from, to,
+                r = amdgpu_copy_buffer(ring, from, to, cur_size,
-                                       cur_pages * PAGE_SIZE,
+                                       resv, &next, false, true);
-                                       bo->resv, &next, false, true);
                if (r)
                        goto error;
                dma_fence_put(fence);
                fence = next;
-                num_pages -= cur_pages;
+                size -= cur_size;
-                if (!num_pages)
+                if (!size)
                        break;
-                old_size -= cur_pages;
+                src_node_size -= cur_size;
-                if (!old_size) {
+                if (!src_node_size) {
-                        old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
+                        src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
-                        old_size = old_mm->size;
+                                                             src->mem);
+                        src_node_size = (src_mm->size << PAGE_SHIFT);
                } else {
-                        old_start += cur_pages * PAGE_SIZE;
+                        src_node_start += cur_size;
+                        src_page_offset = src_node_start & (PAGE_SIZE - 1);
                }
+                dst_node_size -= cur_size;
-                new_size -= cur_pages;
+                if (!dst_node_size) {
-                if (!new_size) {
+                        dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
-                        new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
+                                                             dst->mem);
-                        new_size = new_mm->size;
+                        dst_node_size = (dst_mm->size << PAGE_SHIFT);
                } else {
-                        new_start += cur_pages * PAGE_SIZE;
+                        dst_node_start += cur_size;
+                        dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
                }
        }
+error:
        mutex_unlock(&adev->mman.gtt_window_lock);
+        if (f)
+                *f = dma_fence_get(fence);
+        dma_fence_put(fence);
+        return r;
+}
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+                            bool evict, bool no_wait_gpu,
+                            struct ttm_mem_reg *new_mem,
+                            struct ttm_mem_reg *old_mem)
+{
+        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+        struct amdgpu_copy_mem src, dst;
+        struct dma_fence *fence = NULL;
+        int r;
+        src.bo = bo;
+        dst.bo = bo;
+        src.mem = old_mem;
+        dst.mem = new_mem;
+        src.offset = 0;
+        dst.offset = 0;
+        r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+                                       new_mem->num_pages << PAGE_SHIFT,
+                                       bo->resv, &fence);
+        if (r)
+                goto error;
        r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
        dma_fence_put(fence);
        return r;
 error:
-        mutex_unlock(&adev->mman.gtt_window_lock);
        if (fence)
                dma_fence_wait(fence, false);
        dma_fence_put(fence);
@@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
        int r;
        /* Can't move a pinned BO */
-        abo = container_of(bo, struct amdgpu_bo, tbo);
+        abo = ttm_to_amdgpu_bo(bo);
        if (WARN_ON_ONCE(abo->pin_count > 0))
                return -EINVAL;
@@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
                                           unsigned long page_offset)
 {
-        struct drm_mm_node *mm = bo->mem.mm_node;
+        struct drm_mm_node *mm;
-        uint64_t size = mm->size;
+        unsigned long offset = (page_offset << PAGE_SHIFT);
-        uint64_t offset = page_offset;
-        page_offset = do_div(offset, size);
+        mm = amdgpu_find_mm_node(&bo->mem, &offset);
-        mm += offset;
+        return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
-        return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
+                (offset >> PAGE_SHIFT);
 }
 /*
@@ -1142,9 +1222,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
                                    unsigned long offset,
                                    void *buf, int len, int write)
 {
-        struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
        struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
-        struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+        struct drm_mm_node *nodes;
        uint32_t value = 0;
        int ret = 0;
        uint64_t pos;
@@ -1153,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
        if (bo->mem.mem_type != TTM_PL_VRAM)
                return -EIO;
-        while (offset >= (nodes->size << PAGE_SHIFT)) {
+        nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
-                offset -= nodes->size << PAGE_SHIFT;
-                ++nodes;
-        }
        pos = (nodes->start << PAGE_SHIFT) + offset;
        while (len && pos < adev->mc.mc_vram_size) {
@@ -1255,6 +1332,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        /* Change the size here instead of the init above so only lpfn is affected */
        amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+        /*
+         *The reserved vram for firmware must be pinned to the specified
+         *place on the VRAM, so reserve it early.
+         */
+        r = amdgpu_fw_reserve_vram_init(adev);
+        if (r) {
+                return r;
+        }
        r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
                                    AMDGPU_GEM_DOMAIN_VRAM,
                                    &adev->stolen_vga_memory,
@@ -1479,7 +1565,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
        job->vm_needs_flush = vm_needs_flush;
        if (resv) {
                r = amdgpu_sync_resv(adev, &job->sync, resv,
-                                     AMDGPU_FENCE_OWNER_UNDEFINED);
+                                     AMDGPU_FENCE_OWNER_UNDEFINED,
+                                     false);
                if (r) {
                        DRM_ERROR("sync failed (%d).\n", r);
                        goto error_free;
@@ -1571,7 +1658,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
        if (resv) {
                r = amdgpu_sync_resv(adev, &job->sync, resv,
-                                     AMDGPU_FENCE_OWNER_UNDEFINED);
+                                     AMDGPU_FENCE_OWNER_UNDEFINED, false);
                if (r) {
                        DRM_ERROR("sync failed (%d).\n", r);
                        goto error_free;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 15a28578d458..51eacefadea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -44,6 +44,7 @@
44	#include <linux/debugfs.h>	44	#include <linux/debugfs.h>
45	#include <linux/iommu.h>	45	#include <linux/iommu.h>
46	#include "amdgpu.h"	46	#include "amdgpu.h"
		47	#include "amdgpu_object.h"
47	#include "amdgpu_trace.h"	48	#include "amdgpu_trace.h"
48	#include "bif/bif_4_1_d.h"	49	#include "bif/bif_4_1_d.h"
49		50
@@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
209	placement->num_busy_placement = 1;	210	placement->num_busy_placement = 1;
210	return;	211	return;
211	}	212	}
212	abo = container_of(bo, struct amdgpu_bo, tbo);	213	abo = ttm_to_amdgpu_bo(bo);
213	switch (bo->mem.mem_type) {	214	switch (bo->mem.mem_type) {
214	case TTM_PL_VRAM:	215	case TTM_PL_VRAM:
215	if (adev->mman.buffer_funcs &&	216	if (adev->mman.buffer_funcs &&
@@ -257,7 +258,7 @@ gtt:
257		258
258	static int amdgpu_verify_access(struct ttm_buffer_object bo, struct file filp)	259	static int amdgpu_verify_access(struct ttm_buffer_object bo, struct file filp)
259	{	260	{
260	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);	261	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
261		262
262	if (amdgpu_ttm_tt_get_usermm(bo->ttm))	263	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
263	return -EPERM;	264	return -EPERM;
@@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
289	return addr;	290	return addr;
290	}	291	}
291		292
292	static int amdgpu_move_blit(struct ttm_buffer_object *bo,	293	/**
293	bool evict, bool no_wait_gpu,	294	* amdgpu_find_mm_node - Helper function finds the drm_mm_node
294	struct ttm_mem_reg *new_mem,	295	* corresponding to @offset. It also modifies the offset to be
295	struct ttm_mem_reg *old_mem)	296	* within the drm_mm_node returned
		297	*/
		298	static struct drm_mm_node amdgpu_find_mm_node(struct ttm_mem_reg mem,
		299	unsigned long *offset)
296	{	300	{
297	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);	301	struct drm_mm_node *mm_node = mem->mm_node;
298	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
299		302
300	struct drm_mm_node old_mm, new_mm;	303	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
301	uint64_t old_start, old_size, new_start, new_size;	304	*offset -= (mm_node->size << PAGE_SHIFT);
302	unsigned long num_pages;	305	++mm_node;
303	struct dma_fence *fence = NULL;	306	}
304	int r;	307	return mm_node;
		308	}
305		309
306	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);	310	/**
		311	* amdgpu_copy_ttm_mem_to_mem - Helper function for copy
		312	*
		313	* The function copies @size bytes from {src->mem + src->offset} to
		314	* {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
		315	* move and different for a BO to BO copy.
		316	*
		317	* @f: Returns the last fence if multiple jobs are submitted.
		318	*/
		319	int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
		320	struct amdgpu_copy_mem *src,
		321	struct amdgpu_copy_mem *dst,
		322	uint64_t size,
		323	struct reservation_object *resv,
		324	struct dma_fence **f)
		325	{
		326	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
		327	struct drm_mm_node src_mm, dst_mm;
		328	uint64_t src_node_start, dst_node_start, src_node_size,
		329	dst_node_size, src_page_offset, dst_page_offset;
		330	struct dma_fence *fence = NULL;
		331	int r = 0;
		332	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
		333	AMDGPU_GPU_PAGE_SIZE);
307		334
308	if (!ring->ready) {	335	if (!ring->ready) {
309	DRM_ERROR("Trying to move memory with ring turned off.\n");	336	DRM_ERROR("Trying to move memory with ring turned off.\n");
310	return -EINVAL;	337	return -EINVAL;
311	}	338	}
312		339
313	old_mm = old_mem->mm_node;	340	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
314	old_size = old_mm->size;	341	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
315	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);	342	src->offset;
		343	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
		344	src_page_offset = src_node_start & (PAGE_SIZE - 1);
316		345
317	new_mm = new_mem->mm_node;	346	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
318	new_size = new_mm->size;	347	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
319	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);	348	dst->offset;
		349	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
		350	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
320		351
321	num_pages = new_mem->num_pages;
322	mutex_lock(&adev->mman.gtt_window_lock);	352	mutex_lock(&adev->mman.gtt_window_lock);
323	while (num_pages) {	353
324	unsigned long cur_pages = min(min(old_size, new_size),	354	while (size) {
325	(u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);	355	unsigned long cur_size;
326	uint64_t from = old_start, to = new_start;	356	uint64_t from = src_node_start, to = dst_node_start;
327	struct dma_fence *next;	357	struct dma_fence *next;
328		358
329	if (old_mem->mem_type == TTM_PL_TT &&	359	/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
330	!amdgpu_gtt_mgr_is_allocated(old_mem)) {	360	* begins at an offset, then adjust the size accordingly
331	r = amdgpu_map_buffer(bo, old_mem, cur_pages,	361	*/
332	old_start, 0, ring, &from);	362	cur_size = min3(min(src_node_size, dst_node_size), size,
		363	GTT_MAX_BYTES);
		364	if (cur_size + src_page_offset > GTT_MAX_BYTES \|\|
		365	cur_size + dst_page_offset > GTT_MAX_BYTES)
		366	cur_size -= max(src_page_offset, dst_page_offset);
		367
		368	/* Map only what needs to be accessed. Map src to window 0 and
		369	* dst to window 1
		370	*/
		371	if (src->mem->mem_type == TTM_PL_TT &&
		372	!amdgpu_gtt_mgr_is_allocated(src->mem)) {
		373	r = amdgpu_map_buffer(src->bo, src->mem,
		374	PFN_UP(cur_size + src_page_offset),
		375	src_node_start, 0, ring,
		376	&from);
333	if (r)	377	if (r)
334	goto error;	378	goto error;
		379	/* Adjust the offset because amdgpu_map_buffer returns
		380	* start of mapped page
		381	*/
		382	from += src_page_offset;
335	}	383	}
336		384
337	if (new_mem->mem_type == TTM_PL_TT &&	385	if (dst->mem->mem_type == TTM_PL_TT &&
338	!amdgpu_gtt_mgr_is_allocated(new_mem)) {	386	!amdgpu_gtt_mgr_is_allocated(dst->mem)) {
339	r = amdgpu_map_buffer(bo, new_mem, cur_pages,	387	r = amdgpu_map_buffer(dst->bo, dst->mem,
340	new_start, 1, ring, &to);	388	PFN_UP(cur_size + dst_page_offset),
		389	dst_node_start, 1, ring,
		390	&to);
341	if (r)	391	if (r)
342	goto error;	392	goto error;
		393	to += dst_page_offset;
343	}	394	}
344		395
345	r = amdgpu_copy_buffer(ring, from, to,	396	r = amdgpu_copy_buffer(ring, from, to, cur_size,
346	cur_pages * PAGE_SIZE,	397	resv, &next, false, true);
347	bo->resv, &next, false, true);
348	if (r)	398	if (r)
349	goto error;	399	goto error;
350		400
351	dma_fence_put(fence);	401	dma_fence_put(fence);
352	fence = next;	402	fence = next;
353		403
354	num_pages -= cur_pages;	404	size -= cur_size;
355	if (!num_pages)	405	if (!size)
356	break;	406	break;
357		407
358	old_size -= cur_pages;	408	src_node_size -= cur_size;
359	if (!old_size) {	409	if (!src_node_size) {
360	old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);	410	src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
361	old_size = old_mm->size;	411	src->mem);
		412	src_node_size = (src_mm->size << PAGE_SHIFT);
362	} else {	413	} else {
363	old_start += cur_pages * PAGE_SIZE;	414	src_node_start += cur_size;
		415	src_page_offset = src_node_start & (PAGE_SIZE - 1);
364	}	416	}
365		417	dst_node_size -= cur_size;
366	new_size -= cur_pages;	418	if (!dst_node_size) {
367	if (!new_size) {	419	dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
368	new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);	420	dst->mem);
369	new_size = new_mm->size;	421	dst_node_size = (dst_mm->size << PAGE_SHIFT);
370	} else {	422	} else {
371	new_start += cur_pages * PAGE_SIZE;	423	dst_node_start += cur_size;
		424	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
372	}	425	}
373	}	426	}
		427	error:
374	mutex_unlock(&adev->mman.gtt_window_lock);	428	mutex_unlock(&adev->mman.gtt_window_lock);
		429	if (f)
		430	*f = dma_fence_get(fence);
		431	dma_fence_put(fence);
		432	return r;
		433	}
		434
		435
		436	static int amdgpu_move_blit(struct ttm_buffer_object *bo,
		437	bool evict, bool no_wait_gpu,
		438	struct ttm_mem_reg *new_mem,
		439	struct ttm_mem_reg *old_mem)
		440	{
		441	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
		442	struct amdgpu_copy_mem src, dst;
		443	struct dma_fence *fence = NULL;
		444	int r;
		445
		446	src.bo = bo;
		447	dst.bo = bo;
		448	src.mem = old_mem;
		449	dst.mem = new_mem;
		450	src.offset = 0;
		451	dst.offset = 0;
		452
		453	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
		454	new_mem->num_pages << PAGE_SHIFT,
		455	bo->resv, &fence);
		456	if (r)
		457	goto error;
375		458
376	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);	459	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
377	dma_fence_put(fence);	460	dma_fence_put(fence);
378	return r;	461	return r;
379		462
380	error:	463	error:
381	mutex_unlock(&adev->mman.gtt_window_lock);
382
383	if (fence)	464	if (fence)
384	dma_fence_wait(fence, false);	465	dma_fence_wait(fence, false);
385	dma_fence_put(fence);	466	dma_fence_put(fence);
@@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
484	int r;	565	int r;
485		566
486	/* Can't move a pinned BO */	567	/* Can't move a pinned BO */
487	abo = container_of(bo, struct amdgpu_bo, tbo);	568	abo = ttm_to_amdgpu_bo(bo);
488	if (WARN_ON_ONCE(abo->pin_count > 0))	569	if (WARN_ON_ONCE(abo->pin_count > 0))
489	return -EINVAL;	570	return -EINVAL;
490		571
@@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
582	static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,	663	static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
583	unsigned long page_offset)	664	unsigned long page_offset)
584	{	665	{
585	struct drm_mm_node *mm = bo->mem.mm_node;	666	struct drm_mm_node *mm;
586	uint64_t size = mm->size;	667	unsigned long offset = (page_offset << PAGE_SHIFT);
587	uint64_t offset = page_offset;
588		668
589	page_offset = do_div(offset, size);	669	mm = amdgpu_find_mm_node(&bo->mem, &offset);
590	mm += offset;	670	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
591	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;	671	(offset >> PAGE_SHIFT);
592	}	672	}
593		673
594	/*	674	/*
@@ -1142,9 +1222,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1142	unsigned long offset,	1222	unsigned long offset,
1143	void *buf, int len, int write)	1223	void *buf, int len, int write)
1144	{	1224	{
1145	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);	1225	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1146	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);	1226	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1147	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;	1227	struct drm_mm_node *nodes;
1148	uint32_t value = 0;	1228	uint32_t value = 0;
1149	int ret = 0;	1229	int ret = 0;
1150	uint64_t pos;	1230	uint64_t pos;
@@ -1153,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1153	if (bo->mem.mem_type != TTM_PL_VRAM)	1233	if (bo->mem.mem_type != TTM_PL_VRAM)
1154	return -EIO;	1234	return -EIO;
1155		1235
1156	while (offset >= (nodes->size << PAGE_SHIFT)) {	1236	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
1157	offset -= nodes->size << PAGE_SHIFT;
1158	++nodes;
1159	}
1160	pos = (nodes->start << PAGE_SHIFT) + offset;	1237	pos = (nodes->start << PAGE_SHIFT) + offset;
1161		1238
1162	while (len && pos < adev->mc.mc_vram_size) {	1239	while (len && pos < adev->mc.mc_vram_size) {
@@ -1255,6 +1332,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1255	/* Change the size here instead of the init above so only lpfn is affected */	1332	/* Change the size here instead of the init above so only lpfn is affected */
1256	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);	1333	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
1257		1334
		1335	/*
		1336	*The reserved vram for firmware must be pinned to the specified
		1337	*place on the VRAM, so reserve it early.
		1338	*/
		1339	r = amdgpu_fw_reserve_vram_init(adev);
		1340	if (r) {
		1341	return r;
		1342	}
		1343
1258	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,	1344	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
1259	AMDGPU_GEM_DOMAIN_VRAM,	1345	AMDGPU_GEM_DOMAIN_VRAM,
1260	&adev->stolen_vga_memory,	1346	&adev->stolen_vga_memory,
@@ -1479,7 +1565,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
1479	job->vm_needs_flush = vm_needs_flush;	1565	job->vm_needs_flush = vm_needs_flush;
1480	if (resv) {	1566	if (resv) {
1481	r = amdgpu_sync_resv(adev, &job->sync, resv,	1567	r = amdgpu_sync_resv(adev, &job->sync, resv,
1482	AMDGPU_FENCE_OWNER_UNDEFINED);	1568	AMDGPU_FENCE_OWNER_UNDEFINED,
		1569	false);
1483	if (r) {	1570	if (r) {
1484	DRM_ERROR("sync failed (%d).\n", r);	1571	DRM_ERROR("sync failed (%d).\n", r);
1485	goto error_free;	1572	goto error_free;
@@ -1571,7 +1658,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1571		1658
1572	if (resv) {	1659	if (resv) {
1573	r = amdgpu_sync_resv(adev, &job->sync, resv,	1660	r = amdgpu_sync_resv(adev, &job->sync, resv,
1574	AMDGPU_FENCE_OWNER_UNDEFINED);	1661	AMDGPU_FENCE_OWNER_UNDEFINED, false);
1575	if (r) {	1662	if (r) {
1576	DRM_ERROR("sync failed (%d).\n", r);	1663	DRM_ERROR("sync failed (%d).\n", r);
1577	goto error_free;	1664	goto error_free;