aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
authorJohn Brooks <john@fastquake.com>2017-06-27 22:33:18 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-07-14 11:06:33 -0400
commit00f06b246a3056bbaa901a90a5a93c9f81ab8e36 (patch)
tree097060002ba3b1340935941ff5a12c9fb5a60cec /drivers/gpu/drm/amd
parent218b5dcde4d30e071eec4201a36af665ccfa7e1c (diff)
drm/amdgpu: Throttle visible VRAM moves separately
The BO move throttling code is designed to allow VRAM to fill quickly if it is relatively empty. However, this does not take into account situations where the visible VRAM is smaller than total VRAM, and total VRAM may not be close to full but the visible VRAM segment is under pressure. In such situations, visible VRAM would experience unrestricted swapping and performance would drop. Add a separate counter specifically for moves involving visible VRAM, and check it before moving BOs there. v2: Only perform calculations for separate counter if visible VRAM is smaller than total VRAM. (Michel Dänzer) v3: [Michel Dänzer] * Use BO's location rather than the AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag to determine whether to account a move for visible VRAM in most cases. * Use a single if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { block in amdgpu_cs_get_threshold_for_moves. Fixes: 95844d20ae02 (drm/amdgpu: throttle buffer migrations at CS using a fixed MBps limit (v2)) Signed-off-by: John Brooks <john@fastquake.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c12
3 files changed, 87 insertions, 23 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0d6b0617cdf0..c290b262d7da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1117,7 +1117,9 @@ struct amdgpu_cs_parser {
1117 struct list_head validated; 1117 struct list_head validated;
1118 struct dma_fence *fence; 1118 struct dma_fence *fence;
1119 uint64_t bytes_moved_threshold; 1119 uint64_t bytes_moved_threshold;
1120 uint64_t bytes_moved_vis_threshold;
1120 uint64_t bytes_moved; 1121 uint64_t bytes_moved;
1122 uint64_t bytes_moved_vis;
1121 struct amdgpu_bo_list_entry *evictable; 1123 struct amdgpu_bo_list_entry *evictable;
1122 1124
1123 /* user fence */ 1125 /* user fence */
@@ -1555,6 +1557,7 @@ struct amdgpu_device {
1555 spinlock_t lock; 1557 spinlock_t lock;
1556 s64 last_update_us; 1558 s64 last_update_us;
1557 s64 accum_us; /* accumulated microseconds */ 1559 s64 accum_us; /* accumulated microseconds */
1560 s64 accum_us_vis; /* for visible VRAM */
1558 u32 log2_max_MBps; 1561 u32 log2_max_MBps;
1559 } mm_stats; 1562 } mm_stats;
1560 1563
@@ -1846,7 +1849,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
1846bool amdgpu_need_post(struct amdgpu_device *adev); 1849bool amdgpu_need_post(struct amdgpu_device *adev);
1847void amdgpu_update_display_priority(struct amdgpu_device *adev); 1850void amdgpu_update_display_priority(struct amdgpu_device *adev);
1848 1851
1849void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); 1852void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
1853 u64 num_vis_bytes);
1850void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); 1854void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
1851bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); 1855bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
1852int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); 1856int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5599c01b265d..33789510e663 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
223 * ticks. The accumulated microseconds (us) are converted to bytes and 223 * ticks. The accumulated microseconds (us) are converted to bytes and
224 * returned. 224 * returned.
225 */ 225 */
226static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) 226static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
227 u64 *max_bytes,
228 u64 *max_vis_bytes)
227{ 229{
228 s64 time_us, increment_us; 230 s64 time_us, increment_us;
229 u64 max_bytes;
230 u64 free_vram, total_vram, used_vram; 231 u64 free_vram, total_vram, used_vram;
231 232
232 /* Allow a maximum of 200 accumulated ms. This is basically per-IB 233 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
238 */ 239 */
239 const s64 us_upper_bound = 200000; 240 const s64 us_upper_bound = 200000;
240 241
241 if (!adev->mm_stats.log2_max_MBps) 242 if (!adev->mm_stats.log2_max_MBps) {
242 return 0; 243 *max_bytes = 0;
244 *max_vis_bytes = 0;
245 return;
246 }
243 247
244 total_vram = adev->mc.real_vram_size - adev->vram_pin_size; 248 total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
245 used_vram = atomic64_read(&adev->vram_usage); 249 used_vram = atomic64_read(&adev->vram_usage);
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
280 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); 284 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
281 } 285 }
282 286
283 /* This returns 0 if the driver is in debt to disallow (optional) 287 /* This is set to 0 if the driver is in debt to disallow (optional)
284 * buffer moves. 288 * buffer moves.
285 */ 289 */
286 max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); 290 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
291
292 /* Do the same for visible VRAM if half of it is free */
293 if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
294 u64 total_vis_vram = adev->mc.visible_vram_size;
295 u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
296
297 if (used_vis_vram < total_vis_vram) {
298 u64 free_vis_vram = total_vis_vram - used_vis_vram;
299 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
300 increment_us, us_upper_bound);
301
302 if (free_vis_vram >= total_vis_vram / 2)
303 adev->mm_stats.accum_us_vis =
304 max(bytes_to_us(adev, free_vis_vram / 2),
305 adev->mm_stats.accum_us_vis);
306 }
307
308 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
309 } else {
310 *max_vis_bytes = 0;
311 }
287 312
288 spin_unlock(&adev->mm_stats.lock); 313 spin_unlock(&adev->mm_stats.lock);
289 return max_bytes;
290} 314}
291 315
292/* Report how many bytes have really been moved for the last command 316/* Report how many bytes have really been moved for the last command
293 * submission. This can result in a debt that can stop buffer migrations 317 * submission. This can result in a debt that can stop buffer migrations
294 * temporarily. 318 * temporarily.
295 */ 319 */
296void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) 320void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
321 u64 num_vis_bytes)
297{ 322{
298 spin_lock(&adev->mm_stats.lock); 323 spin_lock(&adev->mm_stats.lock);
299 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); 324 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
325 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
300 spin_unlock(&adev->mm_stats.lock); 326 spin_unlock(&adev->mm_stats.lock);
301} 327}
302 328
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
304 struct amdgpu_bo *bo) 330 struct amdgpu_bo *bo)
305{ 331{
306 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 332 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
307 u64 initial_bytes_moved; 333 u64 initial_bytes_moved, bytes_moved;
308 uint32_t domain; 334 uint32_t domain;
309 int r; 335 int r;
310 336
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
314 /* Don't move this buffer if we have depleted our allowance 340 /* Don't move this buffer if we have depleted our allowance
315 * to move it. Don't move anything if the threshold is zero. 341 * to move it. Don't move anything if the threshold is zero.
316 */ 342 */
317 if (p->bytes_moved < p->bytes_moved_threshold) 343 if (p->bytes_moved < p->bytes_moved_threshold) {
318 domain = bo->prefered_domains; 344 if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
319 else 345 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
346 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
347 * visible VRAM if we've depleted our allowance to do
348 * that.
349 */
350 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
351 domain = bo->prefered_domains;
352 else
353 domain = bo->allowed_domains;
354 } else {
355 domain = bo->prefered_domains;
356 }
357 } else {
320 domain = bo->allowed_domains; 358 domain = bo->allowed_domains;
359 }
321 360
322retry: 361retry:
323 amdgpu_ttm_placement_from_domain(bo, domain); 362 amdgpu_ttm_placement_from_domain(bo, domain);
324 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); 363 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
325 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 364 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
326 p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - 365 bytes_moved = atomic64_read(&adev->num_bytes_moved) -
327 initial_bytes_moved; 366 initial_bytes_moved;
367 p->bytes_moved += bytes_moved;
368 if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
369 bo->tbo.mem.mem_type == TTM_PL_VRAM &&
370 bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
371 p->bytes_moved_vis += bytes_moved;
328 372
329 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 373 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
330 domain = bo->allowed_domains; 374 domain = bo->allowed_domains;
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
350 struct amdgpu_bo_list_entry *candidate = p->evictable; 394 struct amdgpu_bo_list_entry *candidate = p->evictable;
351 struct amdgpu_bo *bo = candidate->robj; 395 struct amdgpu_bo *bo = candidate->robj;
352 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 396 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
353 u64 initial_bytes_moved; 397 u64 initial_bytes_moved, bytes_moved;
398 bool update_bytes_moved_vis;
354 uint32_t other; 399 uint32_t other;
355 400
356 /* If we reached our current BO we can forget it */ 401 /* If we reached our current BO we can forget it */
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
370 415
371 /* Good we can try to move this BO somewhere else */ 416 /* Good we can try to move this BO somewhere else */
372 amdgpu_ttm_placement_from_domain(bo, other); 417 amdgpu_ttm_placement_from_domain(bo, other);
418 update_bytes_moved_vis =
419 adev->mc.visible_vram_size < adev->mc.real_vram_size &&
420 bo->tbo.mem.mem_type == TTM_PL_VRAM &&
421 bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
373 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); 422 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
374 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 423 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
375 p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - 424 bytes_moved = atomic64_read(&adev->num_bytes_moved) -
376 initial_bytes_moved; 425 initial_bytes_moved;
426 p->bytes_moved += bytes_moved;
427 if (update_bytes_moved_vis)
428 p->bytes_moved_vis += bytes_moved;
377 429
378 if (unlikely(r)) 430 if (unlikely(r))
379 break; 431 break;
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
554 list_splice(&need_pages, &p->validated); 606 list_splice(&need_pages, &p->validated);
555 } 607 }
556 608
557 p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); 609 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
610 &p->bytes_moved_vis_threshold);
558 p->bytes_moved = 0; 611 p->bytes_moved = 0;
612 p->bytes_moved_vis = 0;
559 p->evictable = list_last_entry(&p->validated, 613 p->evictable = list_last_entry(&p->validated,
560 struct amdgpu_bo_list_entry, 614 struct amdgpu_bo_list_entry,
561 tv.head); 615 tv.head);
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
579 goto error_validate; 633 goto error_validate;
580 } 634 }
581 635
582 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); 636 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
583 637 p->bytes_moved_vis);
584 fpriv->vm.last_eviction_counter = 638 fpriv->vm.last_eviction_counter =
585 atomic64_read(&p->adev->num_evictions); 639 atomic64_read(&p->adev->num_evictions);
586 640
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index a85e75327456..e429829ae93d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -322,7 +322,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
322 struct amdgpu_bo *bo; 322 struct amdgpu_bo *bo;
323 enum ttm_bo_type type; 323 enum ttm_bo_type type;
324 unsigned long page_align; 324 unsigned long page_align;
325 u64 initial_bytes_moved; 325 u64 initial_bytes_moved, bytes_moved;
326 size_t acc_size; 326 size_t acc_size;
327 int r; 327 int r;
328 328
@@ -398,8 +398,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
398 r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, 398 r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
399 &bo->placement, page_align, !kernel, NULL, 399 &bo->placement, page_align, !kernel, NULL,
400 acc_size, sg, resv, &amdgpu_ttm_bo_destroy); 400 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
401 amdgpu_cs_report_moved_bytes(adev, 401 bytes_moved = atomic64_read(&adev->num_bytes_moved) -
402 atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved); 402 initial_bytes_moved;
403 if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
404 bo->tbo.mem.mem_type == TTM_PL_VRAM &&
405 bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
406 amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
407 else
408 amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
403 409
404 if (unlikely(r != 0)) 410 if (unlikely(r != 0))
405 return r; 411 return r;