diff options
author | Marek Olšák <marek.olsak@amd.com> | 2014-03-01 18:56:22 -0500 |
---|---|---|
committer | Christian König <christian.koenig@amd.com> | 2014-03-03 05:00:24 -0500 |
commit | 19dff56a5f4ba1f3a6e28282415a95a48c27bccf (patch) | |
tree | fe3f4ddccdfe7846a6bc9deace1e6e130182854c | |
parent | c9b76548899cde2e729e3bca015d7e78ec5baad7 (diff) |
drm/radeon: limit how much memory TTM can move per IB according to VRAM usage
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_cs.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_object.c | 88 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_object.h | 3 |
3 files changed, 85 insertions, 8 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 07e165128dbf..5abae403ea4f 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c | |||
@@ -168,7 +168,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) | |||
168 | 168 | ||
169 | radeon_cs_buckets_get_list(&buckets, &p->validated); | 169 | radeon_cs_buckets_get_list(&buckets, &p->validated); |
170 | 170 | ||
171 | return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); | 171 | return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); |
172 | } | 172 | } |
173 | 173 | ||
174 | static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) | 174 | static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) |
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 8399fe021769..ed03f2d15853 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c | |||
@@ -366,29 +366,105 @@ void radeon_bo_fini(struct radeon_device *rdev) | |||
366 | arch_phys_wc_del(rdev->mc.vram_mtrr); | 366 | arch_phys_wc_del(rdev->mc.vram_mtrr); |
367 | } | 367 | } |
368 | 368 | ||
369 | int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, | 369 | /* Returns how many bytes TTM can move per IB. |
370 | */ | ||
371 | static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) | ||
372 | { | ||
373 | u64 real_vram_size = rdev->mc.real_vram_size; | ||
374 | u64 vram_usage = atomic64_read(&rdev->vram_usage); | ||
375 | |||
376 | /* This function is based on the current VRAM usage. | ||
377 | * | ||
378 | * - If all of VRAM is free, allow relocating the number of bytes that | ||
379 | * is equal to 1/4 of the size of VRAM for this IB. | ||
380 | |||
381 | * - If more than one half of VRAM is occupied, only allow relocating | ||
382 | * 1 MB of data for this IB. | ||
383 | * | ||
384 | * - From 0 to one half of used VRAM, the threshold decreases | ||
385 | * linearly. | ||
386 | * __________________ | ||
387 | * 1/4 of -|\ | | ||
388 | * VRAM | \ | | ||
389 | * | \ | | ||
390 | * | \ | | ||
391 | * | \ | | ||
392 | * | \ | | ||
393 | * | \ | | ||
394 | * | \________|1 MB | ||
395 | * |----------------| | ||
396 | * VRAM 0 % 100 % | ||
397 | * used used | ||
398 | * | ||
399 | * Note: It's a threshold, not a limit. The threshold must be crossed | ||
400 | * for buffer relocations to stop, so any buffer of an arbitrary size | ||
401 | * can be moved as long as the threshold isn't crossed before | ||
402 | * the relocation takes place. We don't want to disable buffer | ||
403 | * relocations completely. | ||
404 | * | ||
405 | * The idea is that buffers should be placed in VRAM at creation time | ||
406 | * and TTM should only do a minimum number of relocations during | ||
407 | * command submission. In practice, you need to submit at least | ||
408 | * a dozen IBs to move all buffers to VRAM if they are in GTT. | ||
409 | * | ||
410 | * Also, things can get pretty crazy under memory pressure and actual | ||
411 | * VRAM usage can change a lot, so playing safe even at 50% does | ||
412 | * consistently increase performance. | ||
413 | */ | ||
414 | |||
415 | u64 half_vram = real_vram_size >> 1; | ||
416 | u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; | ||
417 | u64 bytes_moved_threshold = half_free_vram >> 1; | ||
418 | return max(bytes_moved_threshold, 1024*1024ull); | ||
419 | } | ||
420 | |||
421 | int radeon_bo_list_validate(struct radeon_device *rdev, | ||
422 | struct ww_acquire_ctx *ticket, | ||
370 | struct list_head *head, int ring) | 423 | struct list_head *head, int ring) |
371 | { | 424 | { |
372 | struct radeon_bo_list *lobj; | 425 | struct radeon_bo_list *lobj; |
373 | struct radeon_bo *bo; | 426 | struct radeon_bo *bo; |
374 | u32 domain; | ||
375 | int r; | 427 | int r; |
428 | u64 bytes_moved = 0, initial_bytes_moved; | ||
429 | u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); | ||
376 | 430 | ||
377 | r = ttm_eu_reserve_buffers(ticket, head); | 431 | r = ttm_eu_reserve_buffers(ticket, head); |
378 | if (unlikely(r != 0)) { | 432 | if (unlikely(r != 0)) { |
379 | return r; | 433 | return r; |
380 | } | 434 | } |
435 | |||
381 | list_for_each_entry(lobj, head, tv.head) { | 436 | list_for_each_entry(lobj, head, tv.head) { |
382 | bo = lobj->bo; | 437 | bo = lobj->bo; |
383 | if (!bo->pin_count) { | 438 | if (!bo->pin_count) { |
384 | domain = lobj->domain; | 439 | u32 domain = lobj->domain; |
385 | 440 | u32 current_domain = | |
441 | radeon_mem_type_to_domain(bo->tbo.mem.mem_type); | ||
442 | |||
443 | /* Check if this buffer will be moved and don't move it | ||
444 | * if we have moved too many buffers for this IB already. | ||
445 | * | ||
446 | * Note that this allows moving at least one buffer of | ||
447 | * any size, because it doesn't take the current "bo" | ||
448 | * into account. We don't want to disallow buffer moves | ||
449 | * completely. | ||
450 | */ | ||
451 | if (current_domain != RADEON_GEM_DOMAIN_CPU && | ||
452 | (domain & current_domain) == 0 && /* will be moved */ | ||
453 | bytes_moved > bytes_moved_threshold) { | ||
454 | /* don't move it */ | ||
455 | domain = current_domain; | ||
456 | } | ||
457 | |||
386 | retry: | 458 | retry: |
387 | radeon_ttm_placement_from_domain(bo, domain); | 459 | radeon_ttm_placement_from_domain(bo, domain); |
388 | if (ring == R600_RING_TYPE_UVD_INDEX) | 460 | if (ring == R600_RING_TYPE_UVD_INDEX) |
389 | radeon_uvd_force_into_uvd_segment(bo); | 461 | radeon_uvd_force_into_uvd_segment(bo); |
390 | r = ttm_bo_validate(&bo->tbo, &bo->placement, | 462 | |
391 | true, false); | 463 | initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); |
464 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); | ||
465 | bytes_moved += atomic64_read(&rdev->num_bytes_moved) - | ||
466 | initial_bytes_moved; | ||
467 | |||
392 | if (unlikely(r)) { | 468 | if (unlikely(r)) { |
393 | if (r != -ERESTARTSYS && domain != lobj->alt_domain) { | 469 | if (r != -ERESTARTSYS && domain != lobj->alt_domain) { |
394 | domain = lobj->alt_domain; | 470 | domain = lobj->alt_domain; |
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 6c3ca9edc2f4..7dff64d1f592 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h | |||
@@ -138,7 +138,8 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev); | |||
138 | extern void radeon_bo_force_delete(struct radeon_device *rdev); | 138 | extern void radeon_bo_force_delete(struct radeon_device *rdev); |
139 | extern int radeon_bo_init(struct radeon_device *rdev); | 139 | extern int radeon_bo_init(struct radeon_device *rdev); |
140 | extern void radeon_bo_fini(struct radeon_device *rdev); | 140 | extern void radeon_bo_fini(struct radeon_device *rdev); |
141 | extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, | 141 | extern int radeon_bo_list_validate(struct radeon_device *rdev, |
142 | struct ww_acquire_ctx *ticket, | ||
142 | struct list_head *head, int ring); | 143 | struct list_head *head, int ring); |
143 | extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, | 144 | extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, |
144 | struct vm_area_struct *vma); | 145 | struct vm_area_struct *vma); |