drm/amdgpu: Throttle visible VRAM moves separately

The BO move throttling code is designed to allow VRAM to fill quickly if it is relatively empty. However, this does not take into account situations where the visible VRAM is smaller than total VRAM, and total VRAM may not be close to full but the visible VRAM segment is under pressure. In such situations, visible VRAM would experience unrestricted swapping and performance would drop. Add a separate counter specifically for moves involving visible VRAM, and check it before moving BOs there. v2: Only perform calculations for separate counter if visible VRAM is smaller than total VRAM. (Michel Dänzer) v3: [Michel Dänzer] * Use BO's location rather than the AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag to determine whether to account a move for visible VRAM in most cases. * Use a single if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { block in amdgpu_cs_get_threshold_for_moves. Fixes: 95844d20ae02 (drm/amdgpu: throttle buffer migrations at CS using a fixed MBps limit (v2)) Signed-off-by: John Brooks <john@fastquake.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: John Brooks <john@fastquake.com> 2017-06-27 22:33:18 -0400
committer: Alex Deucher <alexander.deucher@amd.com> 2017-07-14 11:06:33 -0400
commit: 00f06b246a3056bbaa901a90a5a93c9f81ab8e36 (patch)
tree: 097060002ba3b1340935941ff5a12c9fb5a60cec /drivers/gpu/drm/amd
parent: 218b5dcde4d30e071eec4201a36af665ccfa7e1c (diff)
3 files changed, 87 insertions, 23 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0d6b0617cdf0..c290b262d7da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1117,7 +1117,9 @@ struct amdgpu_cs_parser {
        struct list_head                validated;
        struct dma_fence                *fence;
        uint64_t                        bytes_moved_threshold;
+        uint64_t                        bytes_moved_vis_threshold;
        uint64_t                        bytes_moved;
+        uint64_t                        bytes_moved_vis;
        struct amdgpu_bo_list_entry     *evictable;
        /* user fence */
@@ -1555,6 +1557,7 @@ struct amdgpu_device {
                spinlock_t              lock;
                s64                     last_update_us;
                s64                     accum_us; /* accumulated microseconds */
+                s64                     accum_us_vis; /* for visible VRAM */
                u32                     log2_max_MBps;
        } mm_stats;
@@ -1846,7 +1849,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+                                  u64 num_vis_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5599c01b265d..33789510e663 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
 * ticks. The accumulated microseconds (us) are converted to bytes and
 * returned.
 */
-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+                                              u64 *max_bytes,
+                                              u64 *max_vis_bytes)
 {
        s64 time_us, increment_us;
-        u64 max_bytes;
        u64 free_vram, total_vram, used_vram;
        /* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
         */
        const s64 us_upper_bound = 200000;
-        if (!adev->mm_stats.log2_max_MBps)
+        if (!adev->mm_stats.log2_max_MBps) {
-                return 0;
+                *max_bytes = 0;
+                *max_vis_bytes = 0;
+                return;
+        }
        total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
        used_vram = atomic64_read(&adev->vram_usage);
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
                adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
        }
-        /* This returns 0 if the driver is in debt to disallow (optional)
+        /* This is set to 0 if the driver is in debt to disallow (optional)
         * buffer moves.
         */
-        max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+        *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+        /* Do the same for visible VRAM if half of it is free */
+        if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+                u64 total_vis_vram = adev->mc.visible_vram_size;
+                u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
+                if (used_vis_vram < total_vis_vram) {
+                        u64 free_vis_vram = total_vis_vram - used_vis_vram;
+                        adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+                                                          increment_us, us_upper_bound);
+                        if (free_vis_vram >= total_vis_vram / 2)
+                                adev->mm_stats.accum_us_vis =
+                                        max(bytes_to_us(adev, free_vis_vram / 2),
+                                            adev->mm_stats.accum_us_vis);
+                }
+                *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+        } else {
+                *max_vis_bytes = 0;
+        }
        spin_unlock(&adev->mm_stats.lock);
-        return max_bytes;
 }
 /* Report how many bytes have really been moved for the last command
 * submission. This can result in a debt that can stop buffer migrations
 * temporarily.
 */
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+                                  u64 num_vis_bytes)
 {
        spin_lock(&adev->mm_stats.lock);
        adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+        adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
        spin_unlock(&adev->mm_stats.lock);
 }
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
                                 struct amdgpu_bo *bo)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-        u64 initial_bytes_moved;
+        u64 initial_bytes_moved, bytes_moved;
        uint32_t domain;
        int r;
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
        /* Don't move this buffer if we have depleted our allowance
         * to move it. Don't move anything if the threshold is zero.
         */
-        if (p->bytes_moved < p->bytes_moved_threshold)
+        if (p->bytes_moved < p->bytes_moved_threshold) {
-                domain = bo->prefered_domains;
+                if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
-        else
+                    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+                        /* And don't move a CPU_ACCESS_REQUIRED BO to limited
+                         * visible VRAM if we've depleted our allowance to do
+                         * that.
+                         */
+                        if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+                                domain = bo->prefered_domains;
+                        else
+                                domain = bo->allowed_domains;
+                } else {
+                        domain = bo->prefered_domains;
+                }
+        } else {
                domain = bo->allowed_domains;
+        }
 retry:
        amdgpu_ttm_placement_from_domain(bo, domain);
        initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-        p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
+        bytes_moved = atomic64_read(&adev->num_bytes_moved) -
-                initial_bytes_moved;
+                      initial_bytes_moved;
+        p->bytes_moved += bytes_moved;
+        if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+            bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+            bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+                p->bytes_moved_vis += bytes_moved;
        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
                domain = bo->allowed_domains;
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
                struct amdgpu_bo_list_entry *candidate = p->evictable;
                struct amdgpu_bo *bo = candidate->robj;
                struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-                u64 initial_bytes_moved;
+                u64 initial_bytes_moved, bytes_moved;
+                bool update_bytes_moved_vis;
                uint32_t other;
                /* If we reached our current BO we can forget it */
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
                /* Good we can try to move this BO somewhere else */
                amdgpu_ttm_placement_from_domain(bo, other);
+                update_bytes_moved_vis =
+                        adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+                        bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+                        bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
                initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
                r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-                p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
+                bytes_moved = atomic64_read(&adev->num_bytes_moved) -
                        initial_bytes_moved;
+                p->bytes_moved += bytes_moved;
+                if (update_bytes_moved_vis)
+                        p->bytes_moved_vis += bytes_moved;
                if (unlikely(r))
                        break;
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                list_splice(&need_pages, &p->validated);
        }
-        p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+        amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+                                          &p->bytes_moved_vis_threshold);
        p->bytes_moved = 0;
+        p->bytes_moved_vis = 0;
        p->evictable = list_last_entry(&p->validated,
                                       struct amdgpu_bo_list_entry,
                                       tv.head);
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                goto error_validate;
        }
-        amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
+        amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+                                     p->bytes_moved_vis);
        fpriv->vm.last_eviction_counter =
                atomic64_read(&p->adev->num_evictions);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index a85e75327456..e429829ae93d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -322,7 +322,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
        struct amdgpu_bo *bo;
        enum ttm_bo_type type;
        unsigned long page_align;
-        u64 initial_bytes_moved;
+        u64 initial_bytes_moved, bytes_moved;
        size_t acc_size;
        int r;
@@ -398,8 +398,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
        r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
                                 &bo->placement, page_align, !kernel, NULL,
                                 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
-        amdgpu_cs_report_moved_bytes(adev,
+        bytes_moved = atomic64_read(&adev->num_bytes_moved) -
-                atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
+                      initial_bytes_moved;
+        if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+            bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+            bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+                amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
+        else
+                amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
        if (unlikely(r != 0))
                return r;
author	John Brooks <john@fastquake.com>	2017-06-27 22:33:18 -0400
committer	Alex Deucher <alexander.deucher@amd.com>	2017-07-14 11:06:33 -0400
commit	00f06b246a3056bbaa901a90a5a93c9f81ab8e36 (patch)
tree	097060002ba3b1340935941ff5a12c9fb5a60cec /drivers/gpu/drm/amd
parent	218b5dcde4d30e071eec4201a36af665ccfa7e1c (diff)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0d6b0617cdf0..c290b262d7da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1117,7 +1117,9 @@ struct amdgpu_cs_parser {
1117	struct list_head validated;	1117	struct list_head validated;
1118	struct dma_fence *fence;	1118	struct dma_fence *fence;
1119	uint64_t bytes_moved_threshold;	1119	uint64_t bytes_moved_threshold;
		1120	uint64_t bytes_moved_vis_threshold;
1120	uint64_t bytes_moved;	1121	uint64_t bytes_moved;
		1122	uint64_t bytes_moved_vis;
1121	struct amdgpu_bo_list_entry *evictable;	1123	struct amdgpu_bo_list_entry *evictable;
1122		1124
1123	/* user fence */	1125	/* user fence */
@@ -1555,6 +1557,7 @@ struct amdgpu_device {
1555	spinlock_t lock;	1557	spinlock_t lock;
1556	s64 last_update_us;	1558	s64 last_update_us;
1557	s64 accum_us; /* accumulated microseconds */	1559	s64 accum_us; /* accumulated microseconds */
		1560	s64 accum_us_vis; /* for visible VRAM */
1558	u32 log2_max_MBps;	1561	u32 log2_max_MBps;
1559	} mm_stats;	1562	} mm_stats;
1560		1563
@@ -1846,7 +1849,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
1846	bool amdgpu_need_post(struct amdgpu_device *adev);	1849	bool amdgpu_need_post(struct amdgpu_device *adev);
1847	void amdgpu_update_display_priority(struct amdgpu_device *adev);	1850	void amdgpu_update_display_priority(struct amdgpu_device *adev);
1848		1851
1849	void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);	1852	void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
		1853	u64 num_vis_bytes);
1850	void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);	1854	void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
1851	bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);	1855	bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
1852	int amdgpu_ttm_tt_get_user_pages(struct ttm_tt ttm, struct page *pages);	1856	int amdgpu_ttm_tt_get_user_pages(struct ttm_tt ttm, struct page *pages);


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5599c01b265d..33789510e663 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
223	* ticks. The accumulated microseconds (us) are converted to bytes and	223	* ticks. The accumulated microseconds (us) are converted to bytes and
224	* returned.	224	* returned.
225	*/	225	*/
226	static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)	226	static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
		227	u64 *max_bytes,
		228	u64 *max_vis_bytes)
227	{	229	{
228	s64 time_us, increment_us;	230	s64 time_us, increment_us;
229	u64 max_bytes;
230	u64 free_vram, total_vram, used_vram;	231	u64 free_vram, total_vram, used_vram;
231		232
232	/* Allow a maximum of 200 accumulated ms. This is basically per-IB	233	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
238	*/	239	*/
239	const s64 us_upper_bound = 200000;	240	const s64 us_upper_bound = 200000;
240		241
241	if (!adev->mm_stats.log2_max_MBps)	242	if (!adev->mm_stats.log2_max_MBps) {
242	return 0;	243	*max_bytes = 0;
		244	*max_vis_bytes = 0;
		245	return;
		246	}
243		247
244	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;	248	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
245	used_vram = atomic64_read(&adev->vram_usage);	249	used_vram = atomic64_read(&adev->vram_usage);
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
280	adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);	284	adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
281	}	285	}
282		286
283	/* This returns 0 if the driver is in debt to disallow (optional)	287	/* This is set to 0 if the driver is in debt to disallow (optional)
284	* buffer moves.	288	* buffer moves.
285	*/	289	*/
286	max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);	290	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
		291
		292	/* Do the same for visible VRAM if half of it is free */
		293	if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
		294	u64 total_vis_vram = adev->mc.visible_vram_size;
		295	u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
		296
		297	if (used_vis_vram < total_vis_vram) {
		298	u64 free_vis_vram = total_vis_vram - used_vis_vram;
		299	adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
		300	increment_us, us_upper_bound);
		301
		302	if (free_vis_vram >= total_vis_vram / 2)
		303	adev->mm_stats.accum_us_vis =
		304	max(bytes_to_us(adev, free_vis_vram / 2),
		305	adev->mm_stats.accum_us_vis);
		306	}
		307
		308	*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
		309	} else {
		310	*max_vis_bytes = 0;
		311	}
287		312
288	spin_unlock(&adev->mm_stats.lock);	313	spin_unlock(&adev->mm_stats.lock);
289	return max_bytes;
290	}	314	}
291		315
292	/* Report how many bytes have really been moved for the last command	316	/* Report how many bytes have really been moved for the last command
293	* submission. This can result in a debt that can stop buffer migrations	317	* submission. This can result in a debt that can stop buffer migrations
294	* temporarily.	318	* temporarily.
295	*/	319	*/
296	void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)	320	void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
		321	u64 num_vis_bytes)
297	{	322	{
298	spin_lock(&adev->mm_stats.lock);	323	spin_lock(&adev->mm_stats.lock);
299	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);	324	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
		325	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
300	spin_unlock(&adev->mm_stats.lock);	326	spin_unlock(&adev->mm_stats.lock);
301	}	327	}
302		328
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
304	struct amdgpu_bo *bo)	330	struct amdgpu_bo *bo)
305	{	331	{
306	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);	332	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
307	u64 initial_bytes_moved;	333	u64 initial_bytes_moved, bytes_moved;
308	uint32_t domain;	334	uint32_t domain;
309	int r;	335	int r;
310		336
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
314	/* Don't move this buffer if we have depleted our allowance	340	/* Don't move this buffer if we have depleted our allowance
315	* to move it. Don't move anything if the threshold is zero.	341	* to move it. Don't move anything if the threshold is zero.
316	*/	342	*/
317	if (p->bytes_moved < p->bytes_moved_threshold)	343	if (p->bytes_moved < p->bytes_moved_threshold) {
318	domain = bo->prefered_domains;	344	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
319	else	345	(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
		346	/* And don't move a CPU_ACCESS_REQUIRED BO to limited
		347	* visible VRAM if we've depleted our allowance to do
		348	* that.
		349	*/
		350	if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
		351	domain = bo->prefered_domains;
		352	else
		353	domain = bo->allowed_domains;
		354	} else {
		355	domain = bo->prefered_domains;
		356	}
		357	} else {
320	domain = bo->allowed_domains;	358	domain = bo->allowed_domains;
		359	}
321		360
322	retry:	361	retry:
323	amdgpu_ttm_placement_from_domain(bo, domain);	362	amdgpu_ttm_placement_from_domain(bo, domain);
324	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);	363	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
325	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);	364	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
326	p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -	365	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
327	initial_bytes_moved;	366	initial_bytes_moved;
		367	p->bytes_moved += bytes_moved;
		368	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
		369	bo->tbo.mem.mem_type == TTM_PL_VRAM &&
		370	bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
		371	p->bytes_moved_vis += bytes_moved;
328		372
329	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {	373	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
330	domain = bo->allowed_domains;	374	domain = bo->allowed_domains;
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
350	struct amdgpu_bo_list_entry *candidate = p->evictable;	394	struct amdgpu_bo_list_entry *candidate = p->evictable;
351	struct amdgpu_bo *bo = candidate->robj;	395	struct amdgpu_bo *bo = candidate->robj;
352	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);	396	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
353	u64 initial_bytes_moved;	397	u64 initial_bytes_moved, bytes_moved;
		398	bool update_bytes_moved_vis;
354	uint32_t other;	399	uint32_t other;
355		400
356	/* If we reached our current BO we can forget it */	401	/* If we reached our current BO we can forget it */
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
370		415
371	/* Good we can try to move this BO somewhere else */	416	/* Good we can try to move this BO somewhere else */
372	amdgpu_ttm_placement_from_domain(bo, other);	417	amdgpu_ttm_placement_from_domain(bo, other);
		418	update_bytes_moved_vis =
		419	adev->mc.visible_vram_size < adev->mc.real_vram_size &&
		420	bo->tbo.mem.mem_type == TTM_PL_VRAM &&
		421	bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
373	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);	422	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
374	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);	423	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
375	p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -	424	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
376	initial_bytes_moved;	425	initial_bytes_moved;
		426	p->bytes_moved += bytes_moved;
		427	if (update_bytes_moved_vis)
		428	p->bytes_moved_vis += bytes_moved;
377		429
378	if (unlikely(r))	430	if (unlikely(r))
379	break;	431	break;
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
554	list_splice(&need_pages, &p->validated);	606	list_splice(&need_pages, &p->validated);
555	}	607	}
556		608
557	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);	609	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
		610	&p->bytes_moved_vis_threshold);
558	p->bytes_moved = 0;	611	p->bytes_moved = 0;
		612	p->bytes_moved_vis = 0;
559	p->evictable = list_last_entry(&p->validated,	613	p->evictable = list_last_entry(&p->validated,
560	struct amdgpu_bo_list_entry,	614	struct amdgpu_bo_list_entry,
561	tv.head);	615	tv.head);
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
579	goto error_validate;	633	goto error_validate;
580	}	634	}
581		635
582	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);	636	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
583		637	p->bytes_moved_vis);
584	fpriv->vm.last_eviction_counter =	638	fpriv->vm.last_eviction_counter =
585	atomic64_read(&p->adev->num_evictions);	639	atomic64_read(&p->adev->num_evictions);
586		640


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a85e75327456..e429829ae93d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -322,7 +322,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
322	struct amdgpu_bo *bo;	322	struct amdgpu_bo *bo;
323	enum ttm_bo_type type;	323	enum ttm_bo_type type;
324	unsigned long page_align;	324	unsigned long page_align;
325	u64 initial_bytes_moved;	325	u64 initial_bytes_moved, bytes_moved;
326	size_t acc_size;	326	size_t acc_size;
327	int r;	327	int r;
328		328
@@ -398,8 +398,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
398	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,	398	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
399	&bo->placement, page_align, !kernel, NULL,	399	&bo->placement, page_align, !kernel, NULL,
400	acc_size, sg, resv, &amdgpu_ttm_bo_destroy);	400	acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
401	amdgpu_cs_report_moved_bytes(adev,	401	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
402	atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);	402	initial_bytes_moved;
		403	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
		404	bo->tbo.mem.mem_type == TTM_PL_VRAM &&
		405	bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
		406	amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
		407	else
		408	amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
403		409
404	if (unlikely(r != 0))	410	if (unlikely(r != 0))
405	return r;	411	return r;